@@ -57,3 +57,40 @@ def wait_for_redirect(self, url = None, wait = 0.1, timeout=10):
57
57
return self .dryscrape_session .url ()
58
58
logger .error (f'Maybe { self .dryscrape_session .url ()} isn\' t a redirect URL?' )
59
59
raise Exception ('Timed out on the zipru redirect page.' )
60
+
61
+ def solve_captcha (self , img , width = 1280 , height = 800 ):
62
+ # take a screenshot of the page
63
+ self .dryscrape_session .set_viewport_size (width , height )
64
+ filename = tempfile .mktemp ('.png' )
65
+ self .dryscrape_session .render (filename , width , height )
66
+
67
+ # inject javascript to find the bounds of the captcha
68
+ js = 'document.querySelector("img[src *= captcha]").getBoundingClientRect()'
69
+ rect = self .dryscrape_session .eval_script (js )
70
+ box = (int (rect ['left' ]), int (rect ['top' ]), int (rect ['right' ]), int (rect ['bottom' ]))
71
+
72
+ # solve the captcha in the screenshot
73
+ image = Image .open (filename )
74
+ os .unlink (filename )
75
+ captcha_image = image .crop (box )
76
+ captcha = pytesseract .image_to_string (captcha_image )
77
+ logger .debug (f'Solved the Zipru captcha: "{ captcha } "' )
78
+
79
+ # submit the captcha
80
+ input = self .dryscrape_session .xpath ('//input[@id = "solve_string"]' )[0 ]
81
+ input .set (captcha )
82
+ button = self .dryscrape_session .xpath ('//button[@id = "button_submit"]' )[0 ]
83
+ url = self .dryscrape_session .url ()
84
+ button .click ()
85
+
86
+ # try again if it we redirect to a threat defense URL
87
+ if self .is_threat_defense_url (self .wait_for_redirect (url )):
88
+ return self .bypass_threat_defense ()
89
+
90
+ # otherwise return the cookies as a dict
91
+ cookies = {}
92
+ for cookie_string in self .dryscrape_session .cookies ():
93
+ if 'domain=zipru.to' in cookie_string :
94
+ key , value = cookie_string .split (';' )[0 ].split ('=' )
95
+ cookies [key ] = value
96
+ return cookies
0 commit comments