|
| 1 | +import csv |
| 2 | +from pathlib import Path |
| 3 | +from urllib.request import urlopen |
| 4 | + |
| 5 | +from bs4 import BeautifulSoup, Tag |
| 6 | + |
| 7 | + |
| 8 | +def parse_place(td_place: Tag) -> tuple[str, str]: |
| 9 | + """ |
| 10 | + >>> raw_td = '<td><a href="https://pyconjp.connpass.com/event/33014/" target="_blank">京都</a></td>' |
| 11 | + >>> td_place = BeautifulSoup(raw_td, "html.parser").td |
| 12 | + >>> parse_place(td_place) |
| 13 | + ('京都', 'https://pyconjp.connpass.com/event/33014/') |
| 14 | + """ |
| 15 | + return td_place.text, td_place.a["href"] |
| 16 | + |
| 17 | + |
| 18 | +def parse_count(td_count: Tag) -> str: |
| 19 | + """ |
| 20 | + >>> raw_td = '<td>一般参加11人、学生1人</td>' |
| 21 | + >>> td_count = BeautifulSoup(raw_td, "html.parser").td |
| 22 | + >>> parse_count(td_count) |
| 23 | + '一般参加11人、学生1人' |
| 24 | + """ |
| 25 | + return td_count.text |
| 26 | + |
| 27 | + |
| 28 | +if __name__ == "__main__": |
| 29 | + tools_dir = Path(__file__).parent |
| 30 | + output_dir = tools_dir.parent / "source" / "sections" |
| 31 | + csv_path = output_dir / "participants_count.csv" |
| 32 | + |
| 33 | + with urlopen("https://peraichi.com/landing_pages/view/pycamp") as res: |
| 34 | + raw_html = res.read() |
| 35 | + |
| 36 | + soup = BeautifulSoup(raw_html, "html.parser") |
| 37 | + body = soup.body |
| 38 | + tables = body.find_all("table") |
| 39 | + participants_table = tables[-1] |
| 40 | + rows = participants_table.tbody.find_all("tr") |
| 41 | + |
| 42 | + outputs = [["開催地", "URL", "参加人数"]] |
| 43 | + for row in rows: |
| 44 | + # tableの中で非表示の行は処理しない(重複してしまうため) |
| 45 | + if "pera1-ghost" in row.attrs["class"]: |
| 46 | + continue |
| 47 | + place_tag, count_tag = row.find_all("td") |
| 48 | + place, event_url = parse_place(place_tag) |
| 49 | + count = parse_count(count_tag) |
| 50 | + outputs.append([place, event_url, count]) |
| 51 | + |
| 52 | + with open(csv_path, "w", encoding="utf8") as f: |
| 53 | + writer = csv.writer(f) |
| 54 | + writer.writerows(outputs) |
0 commit comments