Skip to content
This repository was archived by the owner on Jun 8, 2024. It is now read-only.

Commit 96f4c5c

Browse files
committed
Update
1 parent 04aaf87 commit 96f4c5c

File tree

3 files changed

+174
-101
lines changed

3 files changed

+174
-101
lines changed

README.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Instagram OSINT Tool
2+
3+
4+
The Instagram OSINT Tool gets a range of information from an Instagram account that you normally wouldn't be able to get
5+
from just looking at their profile
6+
7+
The information includes:
8+
9+
10+
1. Username
11+
2. Profile Name
12+
3. URL
13+
4. Followers
14+
5. Following
15+
6. Number of Posts
16+
7. Bio
17+
8. Profile Picture URL
18+
9. Is Business Account?
19+
10. Connected to a FB account?
20+
11. External URL
21+
12. Joined Recently?
22+
13. Business Category Name
23+
14. Is private?
24+
15. Is Verified?
25+
16.
26+

banner.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
banner = '''
2+
3+
██╗███╗ ██╗███████╗████████╗ █████╗ ██████╗ ██████╗ █████╗ ███╗ ███╗ ██████╗ ███████╗██╗███╗ ██╗████████╗
4+
██║████╗ ██║██╔════╝╚══██╔══╝██╔══██╗██╔════╝ ██╔══██╗██╔══██╗████╗ ████║ ██╔═══██╗██╔════╝██║████╗ ██║╚══██╔══╝
5+
██║██╔██╗ ██║███████╗ ██║ ███████║██║ ███╗██████╔╝███████║██╔████╔██║ ██║ ██║███████╗██║██╔██╗ ██║ ██║
6+
██║██║╚██╗██║╚════██║ ██║ ██╔══██║██║ ██║██╔══██╗██╔══██║██║╚██╔╝██║ ██║ ██║╚════██║██║██║╚██╗██║ ██║
7+
██║██║ ╚████║███████║ ██║ ██║ ██║╚██████╔╝██║ ██║██║ ██║██║ ╚═╝ ██║ ╚██████╔╝███████║██║██║ ╚████║ ██║
8+
╚═╝╚═╝ ╚═══╝╚══════╝ ╚═╝ ╚═╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝ ╚═════╝ ╚══════╝╚═╝╚═╝ ╚═══╝ ╚═╝
9+
10+
11+
'''

insta_info.py

Lines changed: 137 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,15 @@
11
#! /usr/bin/env python3
2-
#Instagram Scraper
2+
# Instagram Scraper
3+
import argparse
4+
from banner import banner
35
from bs4 import BeautifulSoup
4-
import requests, random, sys, json, time, os, argparse
6+
import json
7+
import os
8+
import requests
9+
import random
10+
import string
11+
import sys
12+
import time
513

614

715
class colors:
@@ -17,149 +25,180 @@ class colors:
1725

1826
class Scraper:
1927

20-
21-
def __init__(self):
22-
self.user_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
23-
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
24-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
25-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
26-
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
27-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
28-
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
29-
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
30-
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
31-
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']
28+
def __init__(self, username):
29+
self.user_agents = [
30+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
31+
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
32+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
33+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
34+
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
35+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
36+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
37+
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
38+
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
39+
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']
3240
self.profile_data = {}
41+
self.username = username
42+
self.make_directory()
43+
self.scrape(self.username)
44+
self.print_data()
45+
self.save_data()
3346

3447

35-
def convert_to_int(self, num: str):
36-
'''Converts values like 11.9k to 11900 because instagram shortens
37-
their follower count, this currently does not work and idk how to fix it'''
38-
if "k" in num:
39-
#Find the first few digits that should be * 1000
40-
try:
41-
front = int(num[:num.index('.')])
42-
back = int(num[num.index('.')+1])
43-
except ValueError:
44-
return (front *1000)
45-
46-
return (front * 1000) + (back * 100)
47-
48-
elif "m" in num:
49-
try:
50-
front = int(num[:num.index('.')])
51-
back = int(num[num.index('.')+1])
52-
except ValueError:
53-
return (front * 1000000)
54-
55-
return (front * 1000000) + (back*100000)
56-
57-
else:
58-
return int(num.replace(',', ''))
59-
6048

61-
def scrape(self,username:str):
62-
'''Takes a username as a string to find information about that person's instagram profile, a random
63-
user agent is picked to spoof when the data is collected'''
64-
time.sleep(2)
65-
#Get the html data with the requests module
49+
def scrape(self, username: str):
50+
"""Takes a username as a string to find information about that person's instagram profile, a random
51+
user agent is picked to spoof when the data is collected
52+
:return: none
53+
:param: username: str
54+
"""
55+
print("[*] Starting Scan")
56+
# Get the html data with the requests module
6657
r = requests.get(f'http://instagram.com/{username}', headers={'User-Agent': random.choice(self.user_agents)})
67-
soup = BeautifulSoup(r.text,'html.parser')
68-
#Find the tags that hold the data we want to parse
69-
general_data = soup.find_all('meta',attrs={'property':'og:description'})
70-
more_data = soup.find_all('script',attrs={'type':'text/javascript'})
58+
soup = BeautifulSoup(r.text, 'html.parser')
59+
# Find the tags that hold the data we want to parse
60+
general_data = soup.find_all('meta', attrs={'property': 'og:description'})
61+
more_data = soup.find_all('script', attrs={'type': 'text/javascript'})
7162
description = soup.find('script', attrs={'type': 'application/ld+json'})
72-
#Try to parse the nessicary content but if it fails, then user != exist
63+
# Try to parse the nessicary content but if it fails, then user != exist
7364
try:
7465
text = general_data[0].get('content').split()
75-
#Get the json data held inside of the <script> type="applicaiton/il/json"
66+
# Get the json data held inside of the <script> type="applicaiton/il/json"
7667
description = json.loads(description.get_text())
7768
profile_meta = json.loads(more_data[3].get_text()[21:].strip(';'))
7869

7970
except:
8071
print(colors.FAIL + f"Username {username} not found" + colors.ENDC)
8172
sys.exit()
8273

83-
#If the user does not have anything in their bio, the value will not be in the json dump
84-
#So we just set the bio to an empty string
74+
# If the user does not have anything in their bio, the value will not be in the json dump
75+
# So we just set the bio to an empty string
76+
# I don't know if I still need this try/catch block atm
8577
try:
86-
self.profile_data = {"Username": text[-1], "Profile name": description['name'], "URL": description['mainEntityofPage']['@id'],
87-
"Followers": text[0], "Following": text[2], "Posts": text[4], "Bio": description['description'],
88-
"profile_pic_url": profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['profile_pic_url_hd'],
89-
"is_business_account": profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_business_account'],
90-
"connected_to_fb": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['connected_fb_page']),
91-
"externalurl": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['external_url']),
92-
"joined_recently": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_joined_recently']),
93-
"business_category_name": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['business_category_name']),
94-
"is_private": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_private']),
95-
"is_verified": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_verified'])}
78+
self.profile_data = {"Username": text[-1], "Profile name": description['name'],
79+
"URL": description['mainEntityofPage']['@id'],
80+
"Followers": text[0], "Following": text[2], "Posts": text[4],
81+
"Bio": str(
82+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['biography']),
83+
"profile_pic_url": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
84+
'profile_pic_url_hd']),
85+
"is_business_account": str(
86+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
87+
'is_business_account']),
88+
"connected_to_fb": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
89+
'connected_fb_page']),
90+
"externalurl": str(
91+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['external_url']),
92+
"joined_recently": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
93+
'is_joined_recently']),
94+
"business_category_name": str(
95+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
96+
'business_category_name']),
97+
"is_private": str(
98+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_private']),
99+
"is_verified": str(
100+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_verified'])}
96101
except KeyError:
97-
profile_data = {"Username": text[-1], "Profile name": description['name'],
98-
"Followers": text[0], "Following": text[2], "Posts": text[4], "Bio": '', "URL":description['mainEntityofPage']['@id'], "ProfilePictureURL":description['image']}
102+
self.profile_data = {"Username": text[-1], "Profile name": description['name'],
103+
"URL": description['mainEntityofPage']['@id'],
104+
"Followers": text[0], "Following": text[2], "Posts": text[4],
105+
"Bio": '',
106+
"profile_pic_url": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
107+
'profile_pic_url_hd']),
108+
"is_business_account": str(
109+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
110+
'is_business_account']),
111+
"connected_to_fb": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
112+
'connected_fb_page']),
113+
"externalurl": str(
114+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['external_url']),
115+
"joined_recently": str(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
116+
'is_joined_recently']),
117+
"business_category_name": str(
118+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
119+
'business_category_name']),
120+
"is_private": str(
121+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_private']),
122+
"is_verified": str(
123+
profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_verified'])}
124+
# Tries to scrape posts if it is a public profile
125+
self.scrape_posts(profile_meta)
126+
127+
128+
def scrape_posts(self, profile_meta: str):
129+
"""Scrapes all posts and downloads thumbnails when necessary
130+
:return: none
131+
"""
132+
if self.profile_data['is_private'].lower() == 'true':
133+
print("Private profile, cannot scrape photos!")
134+
else:
135+
posts = {}
136+
for index, post in enumerate(profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']):
137+
posts[index] = {"Caption": str(post['node']['edge_media_to_caption']['edges'][0]['node']['text']),
138+
"Number of Comments": str(post['node']['edge_media_to_comment']['count']),
139+
"Comments Disabled": str(post['node']['comments_disabled']),
140+
"Taken At Timestamp": str(post['node']['taken_at_timestamp']),
141+
"Number of Likes": str(post['node']['edge_liked_by']['count']),
142+
"Location": str(post['node']['location']),
143+
"Accessability Caption": str(post['node']['accessibility_caption'])
144+
}
145+
146+
# Downloads the thumbnails of the post
147+
for url in post['node']['thumbnail_resources']:
148+
# Picture is just an int index of the url in the list
149+
with open(''.join([random.choice(string.ascii_uppercase) for x in range(random.randint(1, 9))]) + '.jpg', 'wb') as f:
150+
# Delay the request times randomly (be nice to Instagram)
151+
time.sleep(random.randint(2, 16))
152+
r = requests.get(url['src'])
153+
f.write(r.content)
154+
print("Got an Image")
155+
156+
with open('posts.txt', 'w') as f:
157+
f.write(json.dumps(posts))
99158

100-
self.print_data()
101-
self.make_directory()
102-
self.download_profile_picture()
103-
self.save_data()
104159

105160
def make_directory(self):
106-
"""Makes the profile directory of the profile being searched
161+
"""Makes the profile directory and changes the cwd to it
107162
:return: True
108163
"""
109164
try:
110-
os.mkdir(self.profile_data['Username'])
165+
os.mkdir(self.username)
166+
os.chdir(self.username)
111167
except FileExistsError:
112-
print("Error, directory exists!")
113-
sys.exit()
114-
168+
os.chdir(self.username)
115169

116170
def save_data(self):
117171
"""Saves the data to the uname directory
118172
:return: none
119173
:param: none
120174
"""
121-
os.chdir(self.profile_data['Username'])
122-
with open('data.txt','w') as f:
175+
with open('data.txt', 'w') as f:
123176
f.write(json.dumps(self.profile_data))
177+
self.download_profile_picture()
124178
print(f"Saved data to directory {os.getcwd()}")
125179

126180
return True
127181

128182
def print_data(self):
129-
"""Prints out the data to the screen
183+
"""Prints out the data to the screen by iterating through the dict with it's key and value
130184
:return: True
131185
"""
132-
#Print the data out to the user
186+
# Print the data out to the user
133187
print(colors.HEADER + "---------------------------------------------" + colors.ENDC)
134188
print(colors.OKGREEN + f"Results: scan for {self.profile_data['Username']} on instagram" + colors.ENDC)
135-
print(f"""Username:{self.profile_data["Username"]}""")
136-
print(f"URL:{self.profile_data['URL']}")
137-
print(f"Profile name: {self.profile_data['Profile name']}")
138-
print(f"Followers:{self.profile_data['Followers']}")
139-
print(f"Following:{self.profile_data['Following']}")
140-
print(f"Posts:{self.profile_data['Posts']}")
141-
#If the user does not have anything in their bio, the value will not be in the json dump
142-
#So we just set the bio to an empty string
143-
try:
144-
print(f"Profile Bio:{self.profile_data['Bio']}")
145-
except KeyError:
146-
self.profile_data['Bio'] = ''
147-
print("Profile Bio: ''")
148-
print("")
149-
189+
for key, value in self.profile_data.items():
190+
print(key + ':' + value)
150191

151192
def download_profile_picture(self):
152193
"""Downloads the profile pic and saves it to the directory
153194
:return: none
154195
:param: none
155196
"""
156-
os.chdir(self.profile_data['Username'])
157-
with open("profile_pic.jpg","wb") as f:
197+
with open("profile_pic.jpg", "wb") as f:
158198
r = requests.get(self.profile_data['profile_pic_url'])
159199
f.write(r.content)
160200

161201

162-
163202
def parse_args():
164203
parser = argparse.ArgumentParser(description="Instagram OSINT tool")
165204
parser.add_argument("--username", help="profile username", required=True, nargs=1)
@@ -168,16 +207,13 @@ def parse_args():
168207

169208
def main():
170209
args = parse_args()
210+
print(banner)
171211
if args.username == '':
172212
print("Please enter the username")
173213
sys.exit()
174214
else:
175-
s = Scraper()
176-
s.scrape(args.username[0])
215+
s = Scraper(args.username[0])
177216

178217

179218
if __name__ == '__main__':
180-
main()
181-
182-
183-
219+
main()

0 commit comments

Comments
 (0)