Skip to content

Commit e64722a

Browse files
authored
Add files via upload (HarshCasper#1090)
1 parent b3dc951 commit e64722a

File tree

2 files changed

+86
-0
lines changed

2 files changed

+86
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from bs4 import BeautifulSoup
2+
import requests
3+
4+
BOLD = '\033[1m'
5+
END = '\033[0m'
6+
7+
def get_data(req):
8+
"""
9+
The function will scrape the infobox using BeautifulSoup's parser
10+
and gather the information to display.
11+
"""
12+
# Dictionary to store the collected information
13+
info_dict = {}
14+
15+
# Initializing html parsing object of BeautifulSoup
16+
soup = BeautifulSoup(req.text, 'html.parser')
17+
18+
# Finding the infobox class
19+
info_table = soup.find('table', {'class': 'infobox'})
20+
21+
# Gathering all the text fields within the infobox
22+
for tr in info_table.find_all('tr'):
23+
try:
24+
if tr.find('th'):
25+
info_dict[tr.find('th').text] = tr.find('td').text
26+
27+
except AttributeError:
28+
pass
29+
30+
# Presenting the information in the command line
31+
for x, y in info_dict.items():
32+
print('\n{}{}{} : \n{}'.format(BOLD, x, END, y))
33+
34+
35+
def main():
36+
"""
37+
The main function takes the user input search query and generates the URL
38+
to scrape accordingly.
39+
"""
40+
while 1:
41+
# Taking user input
42+
entry = input('\nEnter your search query: ')
43+
44+
# Formatting the input suitable for the URL
45+
entry = entry.split()
46+
query = ' '.join([i.capitalize() for i in entry])
47+
48+
try:
49+
# Generating the URL and making the request
50+
req = requests.get('https://en.wikipedia.org/wiki/'+query)
51+
52+
if req.status_code == 200:
53+
get_data(req)
54+
55+
else:
56+
print('\nInvalid URL!')
57+
except:
58+
print('\nCONNECTION ERROR! TRY AGAIN')
59+
60+
cont = input('\nWould you like to continue?\nPress (y/n): ')
61+
if cont == 'n':
62+
break
63+
64+
65+
if __name__ == '__main__':
66+
main()
67+

Python/Wikipedia Scraper/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Wikipedia infobox scraper
2+
3+
- The Wikipedia Infobox provides all the important factoids for any topic.
4+
- The python script scrapes the Wikipedia Infobox of any user desired query and displays it in a presentable from in the command line.
5+
6+
## Running the script:
7+
8+
```sh
9+
$ python pomodoro_timer.py
10+
```
11+
12+
## Working screenshots:
13+
14+
![Image](https://i.imgur.com/oS3ETNU.png)
15+
16+
17+
## Author:
18+
[Rohini Rao](https://github.com/RohiniRG)
19+

0 commit comments

Comments
 (0)