Skip to content

Commit d6613cf

Browse files
authored
Fix GitHub fetch contribution script (HarshCasper#1237)
1 parent 201ae63 commit d6613cf

File tree

1 file changed

+161
-161
lines changed

1 file changed

+161
-161
lines changed
Lines changed: 161 additions & 161 deletions
Original file line numberDiff line numberDiff line change
@@ -1,170 +1,170 @@
1-
from lxml import html
1+
import argparse
2+
3+
import pandas as pd
24
import requests
35
from bs4 import BeautifulSoup
4-
import pandas as pd
6+
from lxml import html
57
from tabulate import tabulate
6-
import argparse
78

89

910
class Fetch_PullRequests:
10-
"""
11-
Fetches the pull requests of a user in a organization.
12-
"""
13-
def __init__(self, username, organization, filename):
14-
"""
15-
:param username: github user
16-
:param organization: Organisation name
17-
:param filename: filename, it's optional
18-
"""
19-
self.URL = f"https://github.com/{organization}"
20-
self.organization = organization
21-
self.username = username
22-
self.filename = filename
23-
24-
def _list_of_repositories(self):
25-
"""
26-
Function lists the repositories of the organisation.
27-
28-
Returns
29-
-------
30-
list
31-
lists the repositories
32-
33-
"""
34-
page = requests.get(self.URL)
35-
tree = html.fromstring(page.content)
36-
number_of_pages = tree.xpath('//*[@id="org-repositories"]/div[1]/div/div[1]/div/em/@data-total-pages')
37-
# print(number_of_pages)
38-
Repositories = []
39-
if len(number_of_pages) == 0:
40-
Repositories.extend(tree.xpath(
41-
'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
42-
else:
43-
for number in range(1, int(number_of_pages[0]) + 1):
44-
page_ = requests.get(self.URL + f"?page={number}")
45-
tree = html.fromstring(page_.content)
46-
Repositories.extend(tree.xpath(
47-
'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
48-
49-
return list(pd.Series(list(set(Repositories))).str.strip().values)
50-
51-
def _extract_pullrequests(self, repo):
52-
"""
53-
Function fetches the pull request of a repo.
54-
55-
Parameters
56-
----------
57-
repo: str
58-
repository name
59-
60-
Returns
61-
-------
62-
pandas dataframe
63-
dataframe consists of columns - "Title to PR", "Link of PR", "Status(Merged/Closed/Open)"
64-
65-
"""
66-
# initializing the lists to store the title, link and status of the pull request
67-
Title = []
68-
Link = []
69-
Status = []
70-
71-
URL = self.URL + f"/{repo}/pulls?q=is%3Apr+author%3A{self.username}"
72-
page = requests.get(URL)
73-
tree = html.fromstring(page.content)
74-
# to determine the number of pages
75-
number_of_pages = tree.xpath('//*[@id="repo-content-pjax-container"]/div/div[6]/div/em/@data-total-pages')
76-
77-
if len(number_of_pages) == 0:
78-
# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
79-
soup = BeautifulSoup(page.text, 'html.parser')
80-
# "Title may contain text in <code> tags. So,to handle it we use beautiful soup.
81-
for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
82-
Title.append(tag.text.strip())
83-
Link.extend(
84-
tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
85-
Status.extend(tree.xpath(
86-
'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
87-
88-
else:
89-
for number in range(1, int(number_of_pages[0]) + 1):
90-
URL = self.URL + f"/{repo}/pulls?page={number}&q=is%3Apr+author%3A{self.username}"
91-
page = requests.get(URL)
92-
tree = html.fromstring(page.content)
93-
94-
# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
95-
soup = BeautifulSoup(page.text, 'html.parser')
96-
# Names = tree.xpath(
97-
# '//*[contains(concat( " ", @class, " " ), concat( " ", "opened-by", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "Link--muted", " " ))]/text()')
98-
99-
for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
100-
Title.append(tag.text.strip())
101-
Link.extend(tree.xpath(
102-
'//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
103-
Status.extend(tree.xpath(
104-
'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
105-
106-
Data = {
107-
"Title to PR": Title,
108-
"Link of PR": Link,
109-
"Status(Merged/Closed/Open)": Status
110-
}
111-
112-
# creating a dataframe with the above dictionary
113-
dataframe = pd.DataFrame.from_dict(Data)
114-
# dataframe.head()
115-
116-
# make necessary changes to the columns of dataframe before returning it
117-
dataframe['Status(Merged/Closed/Open)'] = dataframe['Status(Merged/Closed/Open)'].astype(str).str.replace(
118-
" pull request",
119-
"", regex=False)
120-
if dataframe['Link of PR'].dtype!='O':
121-
dataframe['Link of PR'] = dataframe['Link of PR'].astype(str)
122-
dataframe['Link of PR'] = 'https://github.com' + dataframe['Link of PR']
123-
124-
return dataframe
125-
126-
def get_pullrequests(self):
127-
"""
128-
Function pass the repo parameter to the "_extract_pullrequests" to fetch the pull requests of the particular repo.
129-
130-
Returns
131-
-------
132-
str
133-
return str saying that the file is stored if markdown is not empty.
134-
135-
"""
136-
dataframe = pd.DataFrame()
137-
for repo in self._list_of_repositories():
138-
dataframe = dataframe.append(self._extract_pullrequests(repo), ignore_index=True)
139-
140-
markdown = dataframe.to_markdown()
141-
142-
if len(markdown) > 0:
143-
# creating a markdown file
144-
# markdown_file = open(f"{self.filename}.md", "w")
145-
with open(f"{self.filename}.md", "w") as markdown_file:
146-
markdown_file.write(markdown)
147-
148-
return "Markdown File is successfully stored"
149-
150-
return "No pull requests found !!"
11+
"""
12+
Fetches the pull requests of a user in a organization.
13+
"""
14+
def __init__(self, username, organization, filename):
15+
"""
16+
:param username: github user
17+
:param organization: Organisation name
18+
:param filename: filename, it's optional
19+
"""
20+
self.ORG_URL = f"https://github.com/orgs/{organization}/repositories"
21+
self.URL = f"https://github.com/{organization}"
22+
self.organization = organization
23+
self.username = username
24+
self.filename = filename
25+
26+
def _list_of_repositories(self):
27+
"""
28+
Function lists the repositories of the organisation.
29+
30+
Returns
31+
-------
32+
list
33+
lists the repositories
34+
35+
"""
36+
page = requests.get(self.ORG_URL)
37+
tree = html.fromstring(page.content)
38+
number_of_pages = tree.xpath('//*[@id="org-repositories"]/div/div/div[2]/div/em/@data-total-pages')
39+
Repositories = []
40+
if len(number_of_pages) == 0:
41+
Repositories.extend(tree.xpath(
42+
'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
43+
else:
44+
for number in range(1, int(number_of_pages[0]) + 1):
45+
page_ = requests.get(self.ORG_URL + f"?page={number}")
46+
tree = html.fromstring(page_.content)
47+
Repositories.extend(tree.xpath(
48+
'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
49+
50+
return list(pd.Series(list(set(Repositories))).str.strip().values)
51+
52+
def _extract_pullrequests(self, repo):
53+
"""
54+
Function fetches the pull request of a repo.
55+
56+
Parameters
57+
----------
58+
repo: str
59+
repository name
60+
61+
Returns
62+
-------
63+
pandas dataframe
64+
dataframe consists of columns - "Title to PR", "Link of PR", "Status(Merged/Closed/Open)"
65+
66+
"""
67+
# initializing the lists to store the title, link and status of the pull request
68+
Title = []
69+
Link = []
70+
Status = []
71+
URL = self.URL + f"/{repo}/pulls?q=is%3Apr+author%3A{self.username}"
72+
page = requests.get(URL)
73+
tree = html.fromstring(page.content)
74+
# to determine the number of pages
75+
number_of_pages = tree.xpath('//*[@id="repo-content-pjax-container"]/div/div[6]/div/em/@data-total-pages')
76+
77+
if len(number_of_pages) == 0:
78+
# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
79+
soup = BeautifulSoup(page.text, 'html.parser')
80+
# "Title may contain text in <code> tags. So,to handle it we use beautiful soup.
81+
for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
82+
Title.append(tag.text.strip())
83+
Link.extend(
84+
tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
85+
Status.extend(tree.xpath(
86+
'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
87+
88+
else:
89+
for number in range(1, int(number_of_pages[0]) + 1):
90+
URL = self.URL + f"/{repo}/pulls?page={number}&q=is%3Apr+author%3A{self.username}"
91+
page = requests.get(URL)
92+
tree = html.fromstring(page.content)
93+
94+
# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
95+
soup = BeautifulSoup(page.text, 'html.parser')
96+
# Names = tree.xpath(
97+
# '//*[contains(concat( " ", @class, " " ), concat( " ", "opened-by", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "Link--muted", " " ))]/text()')
98+
99+
for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
100+
Title.append(tag.text.strip())
101+
Link.extend(tree.xpath(
102+
'//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
103+
Status.extend(tree.xpath(
104+
'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
105+
106+
Data = {
107+
"Title to PR": Title,
108+
"Link of PR": Link,
109+
"Status(Merged/Closed/Open)": Status
110+
}
111+
112+
# creating a dataframe with the above dictionary
113+
dataframe = pd.DataFrame.from_dict(Data)
114+
# dataframe.head()
115+
116+
# make necessary changes to the columns of dataframe before returning it
117+
dataframe['Status(Merged/Closed/Open)'] = dataframe['Status(Merged/Closed/Open)'].astype(str).str.replace(
118+
" pull request",
119+
"", regex=False)
120+
if dataframe['Link of PR'].dtype!='O':
121+
dataframe['Link of PR'] = dataframe['Link of PR'].astype(str)
122+
dataframe['Link of PR'] = 'https://github.com' + dataframe['Link of PR']
123+
124+
return dataframe
125+
126+
def get_pullrequests(self):
127+
"""
128+
Function pass the repo parameter to the "_extract_pullrequests" to fetch the pull requests of the particular repo.
129+
130+
Returns
131+
-------
132+
str
133+
return str saying that the file is stored if markdown is not empty.
134+
135+
"""
136+
dataframe = pd.DataFrame()
137+
for repo in self._list_of_repositories():
138+
dataframe = dataframe.append(self._extract_pullrequests(repo), ignore_index=True)
139+
140+
markdown = dataframe.to_markdown()
141+
142+
if len(markdown) > 0:
143+
# creating a markdown file
144+
# markdown_file = open(f"{self.filename}.md", "w")
145+
with open(f"{self.filename}.md", "w") as markdown_file:
146+
markdown_file.write(markdown)
147+
148+
return "Markdown File is successfully stored"
149+
150+
return "No pull requests found !!"
151151

152152

153153
if __name__ == "__main__":
154-
parser = argparse.ArgumentParser()
155-
parser.add_argument("-u", "--username", action="store_true")
156-
parser.add_argument("user", type=str, help="The name of the user to get the pull requests")
157-
parser.add_argument("-o", "--organization", action="store_true")
158-
parser.add_argument("organization_name", type=str, help="the organisation where user made the pull requests")
159-
parser.add_argument("-f", "--file", nargs="?")
160-
parser.add_argument("filename", type=str, nargs="?", help="filename to store the markdown table")
161-
args = parser.parse_args()
162-
if args.filename:
163-
file_name = args.filename
164-
else:
165-
file_name = "Markdown_file"
166-
if args.username and args.organization:
167-
response = Fetch_PullRequests(args.user, args.organization_name, file_name)
168-
print(response.get_pullrequests())
169-
else:
170-
print("Please pass atleast two arguments: '--username', '--organisation'")
154+
parser = argparse.ArgumentParser()
155+
parser.add_argument("-u", "--username", action="store_true")
156+
parser.add_argument("user", type=str, help="The name of the user to get the pull requests")
157+
parser.add_argument("-o", "--organization", action="store_true")
158+
parser.add_argument("organization_name", type=str, help="the organisation where user made the pull requests")
159+
parser.add_argument("-f", "--file", nargs="?")
160+
parser.add_argument("filename", type=str, nargs="?", help="filename to store the markdown table")
161+
args = parser.parse_args()
162+
if args.filename:
163+
file_name = args.filename
164+
else:
165+
file_name = "Markdown_file"
166+
if args.username and args.organization:
167+
response = Fetch_PullRequests(args.user, args.organization_name, file_name)
168+
print(response.get_pullrequests())
169+
else:
170+
print("Please pass atleast two arguments: '--username', '--organisation'")

0 commit comments

Comments
 (0)