|
| 1 | +''' |
| 2 | +Small scraper built as a follow-along of EngineerMan's live stream. I decided |
| 3 | +to use the rich module to experiment with displaying data to the terminal in a |
| 4 | +'nicer' to look at way. |
| 5 | +''' |
| 6 | + |
| 7 | +import requests |
| 8 | +from bs4 import BeautifulSoup |
| 9 | +from rich import box |
| 10 | +from rich.table import Table |
| 11 | +from rich.console import Console |
| 12 | + |
| 13 | +url = 'https://yardsalesearch.com/garage-sales.html?zip=90210' |
| 14 | + |
| 15 | +# Initialize console method for printing tables |
| 16 | +console = Console() |
| 17 | + |
| 18 | +# Initialize the table and headers |
| 19 | +table = Table( |
| 20 | + box=box.SIMPLE, |
| 21 | + show_header=True, |
| 22 | + header_style='bold', |
| 23 | +) |
| 24 | +table.add_column('Address') |
| 25 | +table.add_column('City') |
| 26 | +table.add_column('State') |
| 27 | +table.add_column('Zip Code') |
| 28 | +table.add_column('Latitude', style='dim', justify="right") |
| 29 | +table.add_column('Longitude', style='dim', justify="right") |
| 30 | +table.add_column('Start Date', style='dim', justify="right") |
| 31 | +table.add_column('End Date', style='dim', justify="right") |
| 32 | + |
| 33 | +# The status method adds a spinner on the screen while data is being lodaded |
| 34 | +with console.status('Fetching data...'): |
| 35 | + # Fetch the content and store as a 'Soup' object for parsing |
| 36 | + html = requests.get(url).text |
| 37 | + soup = BeautifulSoup(html, 'html.parser') |
| 38 | + |
| 39 | + # Identify in the website the content we're interested in and target it |
| 40 | + for element in soup.find_all('div', { 'class': 'event row featured' }): |
| 41 | + table.add_row( |
| 42 | + element.find('span', { 'itemprop': 'streetAddress' }).text, |
| 43 | + element.find('span', { 'itemprop': 'addressLocality' }).text, |
| 44 | + element.find_all('span', { 'itemprop': 'addressRegion' })[0].text, |
| 45 | + element.find_all('span', { 'itemprop': 'addressRegion' })[1].text, |
| 46 | + element.find('meta', { 'itemprop': 'latitude' })['content'], |
| 47 | + element.find('meta', { 'itemprop': 'longitude' })['content'], |
| 48 | + element.find('meta', { 'itemprop': 'startDate' })['content'], |
| 49 | + element.find('meta', { 'itemprop': 'endDate' })['content'] |
| 50 | + ) |
| 51 | + |
| 52 | + |
| 53 | +console.print(table) |
0 commit comments