Skip to content

Commit 19ca115

Browse files
committed
add scraper.py
1 parent f178a62 commit 19ca115

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

oh_node.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Usage: node_way.py ROOT_DIR [-v verbose][-t top extensions]
88
99
TODO: ignore binary type formats (images, executables, etc)
10+
TODO: include rich module for better looking output (and cleaner code too)
1011
'''
1112

1213
from pathlib import Path
@@ -16,7 +17,6 @@
1617
import os
1718
import re
1819

19-
2020
def main():
2121
args = parse_arguments()
2222

scraper.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
'''
2+
Small scraper built as a follow-along of EngineerMan's live stream. I decided
3+
to use the rich module to experiment with displaying data to the terminal in a
4+
'nicer' to look at way.
5+
'''
6+
7+
import requests
8+
from bs4 import BeautifulSoup
9+
from rich import box
10+
from rich.table import Table
11+
from rich.console import Console
12+
13+
url = 'https://yardsalesearch.com/garage-sales.html?zip=90210'
14+
15+
# Initialize console method for printing tables
16+
console = Console()
17+
18+
# Initialize the table and headers
19+
table = Table(
20+
box=box.SIMPLE,
21+
show_header=True,
22+
header_style='bold',
23+
)
24+
table.add_column('Address')
25+
table.add_column('City')
26+
table.add_column('State')
27+
table.add_column('Zip Code')
28+
table.add_column('Latitude', style='dim', justify="right")
29+
table.add_column('Longitude', style='dim', justify="right")
30+
table.add_column('Start Date', style='dim', justify="right")
31+
table.add_column('End Date', style='dim', justify="right")
32+
33+
# The status method adds a spinner on the screen while data is being lodaded
34+
with console.status('Fetching data...'):
35+
# Fetch the content and store as a 'Soup' object for parsing
36+
html = requests.get(url).text
37+
soup = BeautifulSoup(html, 'html.parser')
38+
39+
# Identify in the website the content we're interested in and target it
40+
for element in soup.find_all('div', { 'class': 'event row featured' }):
41+
table.add_row(
42+
element.find('span', { 'itemprop': 'streetAddress' }).text,
43+
element.find('span', { 'itemprop': 'addressLocality' }).text,
44+
element.find_all('span', { 'itemprop': 'addressRegion' })[0].text,
45+
element.find_all('span', { 'itemprop': 'addressRegion' })[1].text,
46+
element.find('meta', { 'itemprop': 'latitude' })['content'],
47+
element.find('meta', { 'itemprop': 'longitude' })['content'],
48+
element.find('meta', { 'itemprop': 'startDate' })['content'],
49+
element.find('meta', { 'itemprop': 'endDate' })['content']
50+
)
51+
52+
53+
console.print(table)

0 commit comments

Comments
 (0)