Skip to content

Commit 73f20e2

Browse files
committed
Add new features, paths, partial downloads.
* Add possibility to specify path for chrome * Add possibility to specify path for input and output fill * Add possibility for filler to only download new data
1 parent 7ab4eff commit 73f20e2

File tree

3 files changed

+74
-20
lines changed

3 files changed

+74
-20
lines changed

fetcher.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,34 @@
1111

1212
import time
1313
import csv
14+
import argparse
1415
from datetime import datetime
1516
from selenium.webdriver import Chrome, ChromeOptions
1617
from selenium.webdriver.common.by import By
1718

1819
hacktivity_url = 'https://hackerone.com/hacktivity/overview'
1920
page_loading_timeout = 10
2021

22+
def create_argument_parser():
23+
argparser = argparse.ArgumentParser()
24+
argparser.add_argument(
25+
'--browser-binary',
26+
type=str,
27+
help='Path to browser binary (Chrome or Chromium)',
28+
default='/Applications/Google Chrome.app/Contents/MacOS/Google Chrome')
29+
argparser.add_argument(
30+
'--input-data-file',
31+
type=str,
32+
help='Path to input data file',
33+
default='data.csv'
34+
)
35+
argparser.add_argument(
36+
'--output-data-file',
37+
type=str,
38+
help='Path to output data file',
39+
default='data.csv'
40+
)
41+
return argparser
2142

2243
def extract_reports(raw_reports):
2344
reports = []
@@ -46,15 +67,15 @@ def extract_reports(raw_reports):
4667
return reports
4768

4869

49-
def fetch():
70+
def fetch(commandline_args):
5071
options = ChromeOptions()
51-
options.binary_location = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
72+
options.binary_location = commandline_args.browser_binary
5273
options.add_argument('no-sandbox')
5374
options.add_argument('headless')
5475
driver = Chrome(options=options)
5576

5677
reports = []
57-
with open('data.csv', 'r', newline='', encoding='utf-8') as file:
78+
with open(commandline_args.input_data_file, 'r', newline='', encoding='utf-8') as file:
5879
reader = csv.DictReader(file)
5980
for row in reader:
6081
reports.append(dict(row))
@@ -93,12 +114,14 @@ def fetch():
93114
finally:
94115
driver.close()
95116

96-
with open('data.csv', 'w', newline='', encoding='utf-8') as file:
117+
with open(commandline_args.output_data_file, 'w', newline='', encoding='utf-8') as file:
97118
keys = reports[0].keys()
98119
writer = csv.DictWriter(file, fieldnames=keys)
99120
writer.writeheader()
100121
writer.writerows(reports)
101122

102123

103124
if __name__ == '__main__':
104-
fetch()
125+
parser = create_argument_parser()
126+
args = parser.parse_args()
127+
fetch(args)

filler.py

Lines changed: 44 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,37 +11,67 @@
1111
import csv
1212
import requests
1313
import time
14+
import argparse
1415

15-
def fill():
16-
reports = []
17-
with open('data.csv', 'r', newline='', encoding='utf-8') as file:
16+
def create_argument_parser():
17+
argparser = argparse.ArgumentParser()
18+
argparser.add_argument(
19+
'--update-all',
20+
action='store_true',
21+
help='Update all reports',
22+
default=False
23+
)
24+
argparser.add_argument(
25+
'--input-data-file',
26+
type=str,
27+
help='Path to input data file',
28+
default='data.csv'
29+
)
30+
argparser.add_argument(
31+
'--output-data-file',
32+
type=str,
33+
help='Path to output data file',
34+
default='data.csv'
35+
)
36+
return argparser
37+
38+
def fill(commandline_args):
39+
fetched_reports = []
40+
new_reports = []
41+
with open(commandline_args.input_data_file, 'r', newline='', encoding='utf-8') as file:
1842
reader = csv.DictReader(file)
1943
for row in reader:
20-
reports.append(dict(row))
21-
count_of_reports = len(reports)
44+
if row['title'] == '' or commandline_args.update_all:
45+
new_reports.append(dict(row))
46+
else:
47+
fetched_reports.append(dict(row))
48+
count_of_reports = len(new_reports)
2249
for i in range(count_of_reports):
2350
time.sleep(0.5)
2451
print('Fetching report ' + str(i + 1) + ' out of ' + str(count_of_reports))
25-
report_url = 'https://' + reports[i]['link'] + '.json'
52+
report_url = 'https://' + new_reports[i]['link'] + '.json'
2653
try:
2754
json_info = requests.get(report_url).json()
28-
reports[i]['title'] = json_info['title']
29-
reports[i]['program'] = json_info['team']['profile']['name']
30-
reports[i]['upvotes'] = int(json_info['vote_count'])
31-
reports[i]['bounty'] = float(json_info['bounty_amount'] if 'bounty_amount' in json_info else "0") if json_info['has_bounty?'] else 0.0
32-
reports[i]['vuln_type'] = json_info['weakness']['name'] if 'weakness' in json_info else ''
55+
new_reports[i]['title'] = json_info['title']
56+
new_reports[i]['program'] = json_info['team']['profile']['name']
57+
new_reports[i]['upvotes'] = int(json_info['vote_count'])
58+
new_reports[i]['bounty'] = float(json_info['bounty_amount'] if 'bounty_amount' in json_info else "0") if json_info['has_bounty?'] else 0.0
59+
new_reports[i]['vuln_type'] = json_info['weakness']['name'] if 'weakness' in json_info else ''
3360
except Exception as err:
3461
print('error at report ' + str(i + 1), err)
3562
continue
3663

37-
print(reports[i])
64+
print(new_reports[i])
3865

39-
with open('data.csv', 'w', newline='', encoding='utf-8') as file:
66+
with open(commandline_args.output_data_file, 'w', newline='', encoding='utf-8') as file:
67+
reports = new_reports + fetched_reports
4068
keys = reports[0].keys()
4169
writer = csv.DictWriter(file, fieldnames=keys)
4270
writer.writeheader()
4371
writer.writerows(reports)
4472

4573

4674
if __name__ == '__main__':
47-
fill()
75+
parser = create_argument_parser()
76+
args = parser.parse_args()
77+
fill(args)

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
selenium
2-
requests
2+
requests
3+
argparse

0 commit comments

Comments
 (0)