Fix GitHub fetch contribution script (HarshCasper#1237)

Ankit1598 · web-flow · commit d6613cfdeeb0 · 2021-10-05T10:22:07.000+05:30
diff --git a/Python/Fetch_Contributions/fetch_contributions.py b/Python/Fetch_Contributions/fetch_contributions.py
@@ -1,170 +1,170 @@
-from lxml import html
+import argparse
+
+import pandas as pd
 import requests
 from bs4 import BeautifulSoup
-import pandas as pd
+from lxml import html
 from tabulate import tabulate
-import argparse
 
 
 class Fetch_PullRequests:
-    """
-    Fetches the pull requests of a user in a organization.
-    """
-    def __init__(self, username, organization, filename):
-        """
-        :param username: github user
-        :param organization: Organisation name
-        :param filename: filename, it's optional
-        """
-        self.URL = f"https://github.com/{organization}"
-        self.organization = organization
-        self.username = username
-        self.filename = filename
-
-    def _list_of_repositories(self):
-        """
-        Function lists the repositories of the organisation.
-
-        Returns
-        -------
-        list
-            lists the repositories
-
-        """
-        page = requests.get(self.URL)
-        tree = html.fromstring(page.content)
-        number_of_pages = tree.xpath('//*[@id="org-repositories"]/div[1]/div/div[1]/div/em/@data-total-pages')
-        # print(number_of_pages)
-        Repositories = []
-        if len(number_of_pages) == 0:
-            Repositories.extend(tree.xpath(
-                '//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
-        else:
-            for number in range(1, int(number_of_pages[0]) + 1):
-                page_ = requests.get(self.URL + f"?page={number}")
-                tree = html.fromstring(page_.content)
-                Repositories.extend(tree.xpath(
-                    '//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
-
-        return list(pd.Series(list(set(Repositories))).str.strip().values)
-
-    def _extract_pullrequests(self, repo):
-        """
-        Function fetches the pull request of a repo.
-
-        Parameters
-        ----------
-        repo: str
-            repository name
-
-        Returns
-        -------
-        pandas dataframe
-            dataframe consists of columns - "Title to PR", "Link of PR", "Status(Merged/Closed/Open)"
-
-        """
-        # initializing the lists to store the title, link and status of the pull request
-        Title = []
-        Link = []
-        Status = []
-        
-        URL = self.URL + f"/{repo}/pulls?q=is%3Apr+author%3A{self.username}"
-        page = requests.get(URL)
-        tree = html.fromstring(page.content)
-        # to determine the number of pages
-        number_of_pages = tree.xpath('//*[@id="repo-content-pjax-container"]/div/div[6]/div/em/@data-total-pages')
-
-        if len(number_of_pages) == 0:
-            # Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
-            soup = BeautifulSoup(page.text, 'html.parser')
-            # "Title may contain text in <code> tags. So,to handle it we use beautiful soup.
-            for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
-                Title.append(tag.text.strip())
-            Link.extend(
-                tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
-            Status.extend(tree.xpath(
-                '//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
-
-        else:
-            for number in range(1, int(number_of_pages[0]) + 1):
-                URL = self.URL + f"/{repo}/pulls?page={number}&q=is%3Apr+author%3A{self.username}"
-                page = requests.get(URL)
-                tree = html.fromstring(page.content)
-
-                # Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
-                soup = BeautifulSoup(page.text, 'html.parser')
-                # Names = tree.xpath(
-                #     '//*[contains(concat( " ", @class, " " ), concat( " ", "opened-by", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "Link--muted", " " ))]/text()')
-
-                for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
-                    Title.append(tag.text.strip())
-                Link.extend(tree.xpath(
-                    '//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
-                Status.extend(tree.xpath(
-                    '//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
-
-        Data = {
-            "Title to PR": Title,
-            "Link of PR": Link,
-            "Status(Merged/Closed/Open)": Status
-        }
-
-        # creating a dataframe with the above dictionary
-        dataframe = pd.DataFrame.from_dict(Data)
-        # dataframe.head()
-
-        # make necessary changes to the columns of dataframe before returning it
-        dataframe['Status(Merged/Closed/Open)'] = dataframe['Status(Merged/Closed/Open)'].astype(str).str.replace(
-            " pull request",
-            "", regex=False)
-        if dataframe['Link of PR'].dtype!='O':
-            dataframe['Link of PR'] = dataframe['Link of PR'].astype(str)
-        dataframe['Link of PR'] = 'https://github.com' + dataframe['Link of PR']
-
-        return dataframe
-
-    def get_pullrequests(self):
-        """
-        Function pass the repo parameter to the "_extract_pullrequests" to fetch the pull requests of the particular repo.
-
-        Returns
-        -------
-        str
-            return str saying that the file is stored if markdown is not empty.
-
-        """
-        dataframe = pd.DataFrame()
-        for repo in self._list_of_repositories():
-            dataframe = dataframe.append(self._extract_pullrequests(repo), ignore_index=True)
-
-        markdown = dataframe.to_markdown()
-
-        if len(markdown) > 0:
-            # creating a markdown file
-            # markdown_file = open(f"{self.filename}.md", "w")
-            with open(f"{self.filename}.md", "w") as markdown_file:
-                markdown_file.write(markdown)
-
-            return "Markdown File is successfully stored"
-
-        return "No pull requests found !!"
+	"""
+	Fetches the pull requests of a user in a organization.
+	"""
+	def __init__(self, username, organization, filename):
+		"""
+		:param username: github user
+		:param organization: Organisation name
+		:param filename: filename, it's optional
+		"""
+		self.ORG_URL = f"https://github.com/orgs/{organization}/repositories"
+		self.URL = f"https://github.com/{organization}"
+		self.organization = organization
+		self.username = username
+		self.filename = filename
+
+	def _list_of_repositories(self):
+		"""
+		Function lists the repositories of the organisation.
+
+		Returns
+		-------
+		list
+			lists the repositories
+
+		"""
+		page = requests.get(self.ORG_URL)
+		tree = html.fromstring(page.content)
+		number_of_pages = tree.xpath('//*[@id="org-repositories"]/div/div/div[2]/div/em/@data-total-pages')
+		Repositories = []
+		if len(number_of_pages) == 0:
+			Repositories.extend(tree.xpath(
+				'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
+		else:
+			for number in range(1, int(number_of_pages[0]) + 1):
+				page_ = requests.get(self.ORG_URL + f"?page={number}")
+				tree = html.fromstring(page_.content)
+				Repositories.extend(tree.xpath(
+					'//*[contains(concat( " ", @class, " " ), concat( " ", "wb-break-all", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "d-inline-block", " " ))]/text()'))
+		
+		return list(pd.Series(list(set(Repositories))).str.strip().values)
+
+	def _extract_pullrequests(self, repo):
+		"""
+		Function fetches the pull request of a repo.
+
+		Parameters
+		----------
+		repo: str
+			repository name
+
+		Returns
+		-------
+		pandas dataframe
+			dataframe consists of columns - "Title to PR", "Link of PR", "Status(Merged/Closed/Open)"
+
+		"""
+		# initializing the lists to store the title, link and status of the pull request
+		Title = []
+		Link = []
+		Status = []
+		URL = self.URL + f"/{repo}/pulls?q=is%3Apr+author%3A{self.username}"
+		page = requests.get(URL)
+		tree = html.fromstring(page.content)
+		# to determine the number of pages
+		number_of_pages = tree.xpath('//*[@id="repo-content-pjax-container"]/div/div[6]/div/em/@data-total-pages')
+		
+		if len(number_of_pages) == 0:
+			# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
+			soup = BeautifulSoup(page.text, 'html.parser')
+			# "Title may contain text in <code> tags. So,to handle it we use beautiful soup.
+			for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
+				Title.append(tag.text.strip())
+			Link.extend(
+				tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
+			Status.extend(tree.xpath(
+				'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
+
+		else:
+			for number in range(1, int(number_of_pages[0]) + 1):
+				URL = self.URL + f"/{repo}/pulls?page={number}&q=is%3Apr+author%3A{self.username}"
+				page = requests.get(URL)
+				tree = html.fromstring(page.content)
+
+				# Title.extend(tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/text()'))
+				soup = BeautifulSoup(page.text, 'html.parser')
+				# Names = tree.xpath(
+				#     '//*[contains(concat( " ", @class, " " ), concat( " ", "opened-by", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "Link--muted", " " ))]/text()')
+
+				for tag in soup.find_all('a', attrs={'class': 'markdown-title'}):
+					Title.append(tag.text.strip())
+				Link.extend(tree.xpath(
+					'//*[contains(concat( " ", @class, " " ), concat( " ", "markdown-title", " " ))]/@href'))
+				Status.extend(tree.xpath(
+					'//*[contains(concat( " ", @class, " " ), concat( " ", "pl-3", " " ))]/span/@aria-label'))
+
+		Data = {
+			"Title to PR": Title,
+			"Link of PR": Link,
+			"Status(Merged/Closed/Open)": Status
+		}
+
+		# creating a dataframe with the above dictionary
+		dataframe = pd.DataFrame.from_dict(Data)
+		# dataframe.head()
+
+		# make necessary changes to the columns of dataframe before returning it
+		dataframe['Status(Merged/Closed/Open)'] = dataframe['Status(Merged/Closed/Open)'].astype(str).str.replace(
+			" pull request",
+			"", regex=False)
+		if dataframe['Link of PR'].dtype!='O':
+			dataframe['Link of PR'] = dataframe['Link of PR'].astype(str)
+		dataframe['Link of PR'] = 'https://github.com' + dataframe['Link of PR']
+
+		return dataframe
+
+	def get_pullrequests(self):
+		"""
+		Function pass the repo parameter to the "_extract_pullrequests" to fetch the pull requests of the particular repo.
+
+		Returns
+		-------
+		str
+			return str saying that the file is stored if markdown is not empty.
+
+		"""
+		dataframe = pd.DataFrame()
+		for repo in self._list_of_repositories():
+			dataframe = dataframe.append(self._extract_pullrequests(repo), ignore_index=True)
+
+		markdown = dataframe.to_markdown()
+
+		if len(markdown) > 0:
+			# creating a markdown file
+			# markdown_file = open(f"{self.filename}.md", "w")
+			with open(f"{self.filename}.md", "w") as markdown_file:
+				markdown_file.write(markdown)
+
+			return "Markdown File is successfully stored"
+
+		return "No pull requests found !!"
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-u", "--username", action="store_true")
-    parser.add_argument("user", type=str, help="The name of the user to get the pull requests")
-    parser.add_argument("-o", "--organization", action="store_true")
-    parser.add_argument("organization_name", type=str, help="the organisation where user made the pull requests")
-    parser.add_argument("-f", "--file", nargs="?")
-    parser.add_argument("filename", type=str, nargs="?", help="filename to store the markdown table")
-    args = parser.parse_args()
-    if args.filename:
-        file_name = args.filename
-    else:
-        file_name = "Markdown_file"
-    if args.username and args.organization:
-        response = Fetch_PullRequests(args.user, args.organization_name, file_name)
-        print(response.get_pullrequests())
-    else:
-        print("Please pass atleast two arguments: '--username', '--organisation'")
+	parser = argparse.ArgumentParser()
+	parser.add_argument("-u", "--username", action="store_true")
+	parser.add_argument("user", type=str, help="The name of the user to get the pull requests")
+	parser.add_argument("-o", "--organization", action="store_true")
+	parser.add_argument("organization_name", type=str, help="the organisation where user made the pull requests")
+	parser.add_argument("-f", "--file", nargs="?")
+	parser.add_argument("filename", type=str, nargs="?", help="filename to store the markdown table")
+	args = parser.parse_args()
+	if args.filename:
+		file_name = args.filename
+	else:
+		file_name = "Markdown_file"
+	if args.username and args.organization:
+		response = Fetch_PullRequests(args.user, args.organization_name, file_name)
+		print(response.get_pullrequests())
+	else:
+		print("Please pass atleast two arguments: '--username', '--organisation'")