Skip to content

Commit d54c8f3

Browse files
committed
First blood
0 parents  commit d54c8f3

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

README.md

Whitespace-only changes.

check_link.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import requests
2+
3+
class check_link():
4+
def __init__(self):
5+
# generates bad responses from 400 to 409 and from 501 to 503
6+
self.bad_resp = list(range(400, 409)) + list(range(501, 504))
7+
self.badLinks = {}
8+
9+
def __str__(self):
10+
# if this object is printed, print the dictionary
11+
return self.badLinks
12+
13+
def check(self, address):
14+
# method will check a link in an address
15+
# this method should be called with different addresses eachtime
16+
try:
17+
req = requests.get(address)
18+
resp = req.status_code
19+
if resp == 200:
20+
return True
21+
else:
22+
# it's possible to get HTTP 999: access denied
23+
# which isn't an error
24+
if resp in self.bad_resp:
25+
self.badLinks.update({resp : address})
26+
return resp
27+
except Exception as e:
28+
print ("{}{}".format(e, address))
29+
pass

deployBot.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Deploy bot for Python
2+
# TODO
3+
# [X] check all links on page for 404
4+
# check all images to see if they have an alt text
5+
# compress html
6+
# compress javascript
7+
# compress css
8+
9+
import check_link
10+
from bs4 import BeautifulSoup
11+
import urllib.request
12+
from multiprocessing import Process
13+
14+
# creates a global check_link object
15+
check_link_obj = check_link.check_link()
16+
17+
def get_all_links(address):
18+
# get all links on a website, return a set
19+
resp = urllib.request.urlopen(address)
20+
soup = BeautifulSoup(resp, 'html.parser')
21+
links = soup.find_all('a')
22+
return {link.get('href') for link in links
23+
if link.get('href') and link.get('href')[0:4]=='http'}
24+
25+
def threader(website):
26+
# this function is used to create new threads
27+
response = check_link_obj.check(website)
28+
if response != True:
29+
print("HTTP " + str(response) + " " + website)
30+
31+
def main():
32+
# creates new threads of threader, starts them then joins them together
33+
website = input("What is the address of the website? ")
34+
all_links = get_all_links(website)
35+
for i in all_links:
36+
try:
37+
Process(target = threader, args = (i, )).start()
38+
except Exception as e:
39+
pass
40+
41+
if __name__=="__main__":
42+
main()

0 commit comments

Comments
 (0)