|
| 1 | +from selenium import webdriver |
| 2 | +from time import sleep |
| 3 | +from selenium.webdriver.common.keys import Keys |
| 4 | +from selenium.common.exceptions import NoSuchElementException |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | + |
| 8 | +def get_users(user_type, insta_id): |
| 9 | + '''Takes the user_type( a string named followers or following) |
| 10 | + and a username as argument and returns an array of the user's |
| 11 | + followers or following repectively''' |
| 12 | + |
| 13 | + # select followers/following button |
| 14 | + button = browser.find_element_by_css_selector( |
| 15 | + "a[href='/"+insta_id+"/"+user_type+"/']") |
| 16 | + # count of followers/following |
| 17 | + no = (int)(button.text.strip().split()[0]) |
| 18 | + # click on followers/following button to open dialog |
| 19 | + button.click() |
| 20 | + # for selecting the dialog |
| 21 | + users = browser.find_element_by_class_name( |
| 22 | + "PZuss") |
| 23 | + # for getting list of users |
| 24 | + users = users.find_elements_by_css_selector( |
| 25 | + "li") |
| 26 | + view = browser.find_element_by_class_name("isgrP") |
| 27 | + actionChain = webdriver.ActionChains(browser) |
| 28 | + # for handeling scroll event on dialog |
| 29 | + actionChain.context_click(on_element=view) |
| 30 | + # no of followers/following in the list |
| 31 | + n = len(users) |
| 32 | + # while number of followers in the list is less than total number of followers of the user |
| 33 | + while(n < no): |
| 34 | + # for scrolling down |
| 35 | + actionChain.key_down(Keys.SPACE).key_up( |
| 36 | + Keys.SPACE).perform() |
| 37 | + # selecting users |
| 38 | + users = browser.find_element_by_class_name("PZuss") |
| 39 | + # updating the number of users in the list |
| 40 | + users = users.find_elements_by_css_selector("li") |
| 41 | + n = len(users) |
| 42 | + # extracting text |
| 43 | + for j in range(len(users)): |
| 44 | + users[j] = users[j].text |
| 45 | + browser.get("https://www.instagram.com/"+insta_id) |
| 46 | + return users |
| 47 | + |
| 48 | + |
| 49 | +def convert_to_csv(followers, following, insta_id): |
| 50 | + '''Takes arrays of followers and following and a username |
| 51 | + as arguments and creates <username>.csv file to store the data''' |
| 52 | + |
| 53 | + final_arr = [] |
| 54 | + for i in range(0, max(len(followers), len(following))): |
| 55 | + follower_account = "" |
| 56 | + follower_name = "" |
| 57 | + following_account = "" |
| 58 | + following_name = "" |
| 59 | + if(i < len(followers)): |
| 60 | + follower_account = followers[i].split("\n")[0] |
| 61 | + follower_name = followers[i].split("\n")[1] |
| 62 | + if(i < len(following)): |
| 63 | + following_account = following[i].split("\n")[1] |
| 64 | + following_name = following[i].split("\n")[0] |
| 65 | + user = { |
| 66 | + "Follower Account": follower_account, |
| 67 | + "Follower Name": follower_name, |
| 68 | + "Following Account": following_account, |
| 69 | + "Following Name": following_name, |
| 70 | + } |
| 71 | + final_arr.append(user) |
| 72 | + # convert to data frame |
| 73 | + df = pd.DataFrame(final_arr) |
| 74 | + # convert to csv |
| 75 | + df.to_csv(insta_id+".csv", index=None) |
| 76 | + |
| 77 | + |
| 78 | +if __name__ == "__main__": |
| 79 | + |
| 80 | + username = input("Enter your Instagram username: ") |
| 81 | + password = input("Enter your Instagram password: ") |
| 82 | + insta_id = input("Enter user's Instagram username for scraping: ") |
| 83 | + |
| 84 | + # path to chromedriver |
| 85 | + PATH = "C:\Program Files (x86)\chromedriver.exe" |
| 86 | + browser = webdriver.Chrome(PATH) |
| 87 | + browser.implicitly_wait(5) |
| 88 | + browser.get("https://www.instagram.com/") |
| 89 | + |
| 90 | + username_input = browser.find_element_by_css_selector( |
| 91 | + "input[name='username']") |
| 92 | + password_input = browser.find_element_by_css_selector( |
| 93 | + "input[name='password']") |
| 94 | + |
| 95 | + # enters username and password |
| 96 | + username_input.send_keys(username) |
| 97 | + password_input.send_keys(password) |
| 98 | + |
| 99 | + # click on submit button |
| 100 | + login_button = browser.find_element_by_xpath("//button[@type='submit']") |
| 101 | + login_button.click() |
| 102 | + |
| 103 | + # save password dialog |
| 104 | + not_now_button = browser.find_element_by_xpath( |
| 105 | + "//button[text()='Not Now']") |
| 106 | + not_now_button.click() |
| 107 | + sleep(2) |
| 108 | + |
| 109 | + # get user's instagram page |
| 110 | + browser.get("https://www.instagram.com/"+insta_id) |
| 111 | + sleep(2) |
| 112 | + |
| 113 | + try: |
| 114 | + arr = ["followers", "following"] |
| 115 | + followers = get_users("followers", insta_id) |
| 116 | + following = get_users("following", insta_id) |
| 117 | + convert_to_csv(followers, following, insta_id) |
| 118 | + except NoSuchElementException: |
| 119 | + print("Invalid Account/ Private Account") |
| 120 | + pass |
0 commit comments