Skip to content

Commit 3d4e7ff

Browse files
committed
Add the solve_captcha method.
1 parent 178e0a1 commit 3d4e7ff

File tree

1 file changed

+37
-0
lines changed

1 file changed

+37
-0
lines changed

zipru_scraper/middlewares.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,40 @@ def wait_for_redirect(self, url = None, wait = 0.1, timeout=10):
5757
return self.dryscrape_session.url()
5858
logger.error(f'Maybe {self.dryscrape_session.url()} isn\'t a redirect URL?')
5959
raise Exception('Timed out on the zipru redirect page.')
60+
61+
def solve_captcha(self, img, width=1280, height=800):
62+
# take a screenshot of the page
63+
self.dryscrape_session.set_viewport_size(width, height)
64+
filename = tempfile.mktemp('.png')
65+
self.dryscrape_session.render(filename, width, height)
66+
67+
# inject javascript to find the bounds of the captcha
68+
js = 'document.querySelector("img[src *= captcha]").getBoundingClientRect()'
69+
rect = self.dryscrape_session.eval_script(js)
70+
box = (int(rect['left']), int(rect['top']), int(rect['right']), int(rect['bottom']))
71+
72+
# solve the captcha in the screenshot
73+
image = Image.open(filename)
74+
os.unlink(filename)
75+
captcha_image = image.crop(box)
76+
captcha = pytesseract.image_to_string(captcha_image)
77+
logger.debug(f'Solved the Zipru captcha: "{captcha}"')
78+
79+
# submit the captcha
80+
input = self.dryscrape_session.xpath('//input[@id = "solve_string"]')[0]
81+
input.set(captcha)
82+
button = self.dryscrape_session.xpath('//button[@id = "button_submit"]')[0]
83+
url = self.dryscrape_session.url()
84+
button.click()
85+
86+
# try again if it we redirect to a threat defense URL
87+
if self.is_threat_defense_url(self.wait_for_redirect(url)):
88+
return self.bypass_threat_defense()
89+
90+
# otherwise return the cookies as a dict
91+
cookies = {}
92+
for cookie_string in self.dryscrape_session.cookies():
93+
if 'domain=zipru.to' in cookie_string:
94+
key, value = cookie_string.split(';')[0].split('=')
95+
cookies[key] = value
96+
return cookies

0 commit comments

Comments
 (0)