|
| 1 | +from bs4 import BeautifulSoup |
| 2 | +import requests |
| 3 | + |
| 4 | +BOLD = '\033[1m' |
| 5 | +END = '\033[0m' |
| 6 | + |
| 7 | +def get_data(req): |
| 8 | + """ |
| 9 | + The function will scrape the infobox using BeautifulSoup's parser |
| 10 | + and gather the information to display. |
| 11 | + """ |
| 12 | + # Dictionary to store the collected information |
| 13 | + info_dict = {} |
| 14 | + |
| 15 | + # Initializing html parsing object of BeautifulSoup |
| 16 | + soup = BeautifulSoup(req.text, 'html.parser') |
| 17 | + |
| 18 | + # Finding the infobox class |
| 19 | + info_table = soup.find('table', {'class': 'infobox'}) |
| 20 | + |
| 21 | + # Gathering all the text fields within the infobox |
| 22 | + for tr in info_table.find_all('tr'): |
| 23 | + try: |
| 24 | + if tr.find('th'): |
| 25 | + info_dict[tr.find('th').text] = tr.find('td').text |
| 26 | + |
| 27 | + except AttributeError: |
| 28 | + pass |
| 29 | + |
| 30 | + # Presenting the information in the command line |
| 31 | + for x, y in info_dict.items(): |
| 32 | + print('\n{}{}{} : \n{}'.format(BOLD, x, END, y)) |
| 33 | + |
| 34 | + |
| 35 | +def main(): |
| 36 | + """ |
| 37 | + The main function takes the user input search query and generates the URL |
| 38 | + to scrape accordingly. |
| 39 | + """ |
| 40 | + while 1: |
| 41 | + # Taking user input |
| 42 | + entry = input('\nEnter your search query: ') |
| 43 | + |
| 44 | + # Formatting the input suitable for the URL |
| 45 | + entry = entry.split() |
| 46 | + query = ' '.join([i.capitalize() for i in entry]) |
| 47 | + |
| 48 | + try: |
| 49 | + # Generating the URL and making the request |
| 50 | + req = requests.get('https://en.wikipedia.org/wiki/'+query) |
| 51 | + |
| 52 | + if req.status_code == 200: |
| 53 | + get_data(req) |
| 54 | + |
| 55 | + else: |
| 56 | + print('\nInvalid URL!') |
| 57 | + except: |
| 58 | + print('\nCONNECTION ERROR! TRY AGAIN') |
| 59 | + |
| 60 | + cont = input('\nWould you like to continue?\nPress (y/n): ') |
| 61 | + if cont == 'n': |
| 62 | + break |
| 63 | + |
| 64 | + |
| 65 | +if __name__ == '__main__': |
| 66 | + main() |
| 67 | + |
0 commit comments