File tree Expand file tree Collapse file tree 3 files changed +71
-0
lines changed Expand file tree Collapse file tree 3 files changed +71
-0
lines changed Original file line number Diff line number Diff line change
1
+ import requests
2
+
3
+ class check_link ():
4
+ def __init__ (self ):
5
+ # generates bad responses from 400 to 409 and from 501 to 503
6
+ self .bad_resp = list (range (400 , 409 )) + list (range (501 , 504 ))
7
+ self .badLinks = {}
8
+
9
+ def __str__ (self ):
10
+ # if this object is printed, print the dictionary
11
+ return self .badLinks
12
+
13
+ def check (self , address ):
14
+ # method will check a link in an address
15
+ # this method should be called with different addresses eachtime
16
+ try :
17
+ req = requests .get (address )
18
+ resp = req .status_code
19
+ if resp == 200 :
20
+ return True
21
+ else :
22
+ # it's possible to get HTTP 999: access denied
23
+ # which isn't an error
24
+ if resp in self .bad_resp :
25
+ self .badLinks .update ({resp : address })
26
+ return resp
27
+ except Exception as e :
28
+ print ("{}{}" .format (e , address ))
29
+ pass
Original file line number Diff line number Diff line change
1
+ # Deploy bot for Python
2
+ # TODO
3
+ # [X] check all links on page for 404
4
+ # check all images to see if they have an alt text
5
+ # compress html
6
+ # compress javascript
7
+ # compress css
8
+
9
+ import check_link
10
+ from bs4 import BeautifulSoup
11
+ import urllib .request
12
+ from multiprocessing import Process
13
+
14
+ # creates a global check_link object
15
+ check_link_obj = check_link .check_link ()
16
+
17
+ def get_all_links (address ):
18
+ # get all links on a website, return a set
19
+ resp = urllib .request .urlopen (address )
20
+ soup = BeautifulSoup (resp , 'html.parser' )
21
+ links = soup .find_all ('a' )
22
+ return {link .get ('href' ) for link in links
23
+ if link .get ('href' ) and link .get ('href' )[0 :4 ]== 'http' }
24
+
25
+ def threader (website ):
26
+ # this function is used to create new threads
27
+ response = check_link_obj .check (website )
28
+ if response != True :
29
+ print ("HTTP " + str (response ) + " " + website )
30
+
31
+ def main ():
32
+ # creates new threads of threader, starts them then joins them together
33
+ website = input ("What is the address of the website? " )
34
+ all_links = get_all_links (website )
35
+ for i in all_links :
36
+ try :
37
+ Process (target = threader , args = (i , )).start ()
38
+ except Exception as e :
39
+ pass
40
+
41
+ if __name__ == "__main__" :
42
+ main ()
You can’t perform that action at this time.
0 commit comments