|
| 1 | +from bs4 import BeautifulSoup |
| 2 | +import requests |
| 3 | +import json |
| 4 | + |
| 5 | + |
| 6 | +fmt = "https://stackoverflow.com/questions/tagged/{tag}?tab={filter}&pagesize=15" |
| 7 | +filters = [ |
| 8 | + "1. Newest", |
| 9 | + "2. Active", |
| 10 | + "3. Bounties", |
| 11 | + "4. Unanswered", |
| 12 | + "5. Frequent", |
| 13 | + "6. Votes", |
| 14 | +] |
| 15 | + |
| 16 | +tag = input("enter any question tag (python, java)\n") |
| 17 | +print("\n".join(filters)) |
| 18 | +filter = int(input("enter the filter number (1, 3, 5)\n")) |
| 19 | + |
| 20 | +try: |
| 21 | + filter = filters[filter].split(" ")[-1] |
| 22 | +except: |
| 23 | + filter = "Votes" |
| 24 | + |
| 25 | +# generate dynamic URL with user preferences |
| 26 | +URL = fmt.format(tag=tag, filter=filter) |
| 27 | + |
| 28 | +print("generated URL ", URL) |
| 29 | +content = requests.get(URL).content |
| 30 | + |
| 31 | +soup = BeautifulSoup(content, "lxml") |
| 32 | + |
| 33 | +# return only question tags |
| 34 | +def is_question(tag): |
| 35 | + try: |
| 36 | + return tag.get("id").startswith("question-summary-") |
| 37 | + except: |
| 38 | + return False |
| 39 | + |
| 40 | + |
| 41 | +questions = soup.find_all(is_question) |
| 42 | +question_data = [] |
| 43 | +if questions: |
| 44 | + # extract question data like votes, title, link and date |
| 45 | + for question in questions: |
| 46 | + question_dict = {} |
| 47 | + question_dict["votes"] = ( |
| 48 | + question.find(class_="s-post-summary--stats-item-number").get_text().strip() |
| 49 | + ) |
| 50 | + h3 = question.find(class_="s-post-summary--content-title") |
| 51 | + question_dict["title"] = h3.get_text().strip() |
| 52 | + question_dict["link"] = "https://stackoverflow.com" + h3.find("a").get("href") |
| 53 | + question_dict["date"] = ( |
| 54 | + question.find(class_="s-user-card--time").span.get_text().strip() |
| 55 | + ) |
| 56 | + question_data.append(question_dict) |
| 57 | + |
| 58 | + with open(f"questions-{tag}.json", "w") as f: |
| 59 | + json.dump(question_data, f) |
| 60 | + |
| 61 | + print("file exported") |
| 62 | + |
| 63 | +else: |
| 64 | + print(URL) |
| 65 | + print("looks like there are no questions matching your tag ", tag) |
0 commit comments