Skip to content

Commit b3dc951

Browse files
authored
Google Playstore Scraper (HarshCasper#1092)
* Add files via upload * Update PlaystoreScraper.py
1 parent 2cb12b9 commit b3dc951

File tree

3 files changed

+167
-0
lines changed

3 files changed

+167
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import time
2+
from selenium import webdriver
3+
import csv
4+
5+
6+
def write_to_csv(row_array):
7+
"""
8+
The function stores the scraped info in a .csv file
9+
"""
10+
# Headings for the first row of the file
11+
header_list = ['URL', 'NAME', 'RATING', 'REVIEW', 'INSTALLS', 'CURRENT VERSION',
12+
'LAST UPDATED', 'COMPANY', 'CONTACT']
13+
file_name = input('\nEnter the name of file to store the info: ')
14+
15+
# Adding info into the rows of the file
16+
with open(file_name + '.csv', 'a', encoding='utf-8') as csv_f:
17+
csv_pointer = csv.writer(csv_f, delimiter=',')
18+
csv_pointer.writerow(header_list)
19+
csv_pointer.writerows(row_array)
20+
21+
print(f'Done! Check your directory for {file_name}.csv file!')
22+
23+
24+
25+
def scraper():
26+
"""
27+
The function makes use of Selenium to scrape the required information
28+
which is later stored in a .csv file
29+
"""
30+
driver = webdriver.Chrome()
31+
while 1:
32+
# Takes user input for query
33+
query = input("\nEnter search query: ")
34+
35+
# Getting the URL for the search
36+
driver.get(f'https://play.google.com/store/search?q={query}&c=apps')
37+
38+
print(f'\nCollecting information for {query}...\n')
39+
40+
# Time for the page to load
41+
time.sleep(5)
42+
43+
# Inorder to enable scrolling to collect all the necessary information
44+
last_height = driver.execute_script("return document.body.scrollHeight")
45+
time.sleep(5)
46+
47+
# Scrolling till the end of the page
48+
while True:
49+
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
50+
51+
time.sleep(5)
52+
53+
new_height = driver.execute_script("return document.body.scrollHeight")
54+
if new_height == last_height:
55+
break
56+
last_height = new_height
57+
58+
# Storing all the URLS of the apps shown throughout the page
59+
store_urls = []
60+
elems = driver.find_elements_by_xpath("//a[@href]")
61+
for elem in elems:
62+
if "details?id" in elem.get_attribute("href"):
63+
store_urls.append((elem.get_attribute("href")))
64+
65+
# Removing duplicates
66+
store_urls = list(dict.fromkeys(store_urls))
67+
68+
all_info = []
69+
# Going into each URL to gather the details we require
70+
for every in store_urls:
71+
try:
72+
each_info = []
73+
# get URL
74+
driver.get(every)
75+
each_info.append(every)
76+
time.sleep(3)
77+
78+
# get app name
79+
header1 = driver.find_element_by_tag_name("h1")
80+
each_info.append(header1.text)
81+
82+
# get the star rating
83+
star = driver.find_element_by_class_name("BHMmbe")
84+
each_info.append(star.text)
85+
86+
# get the number of comments
87+
comments = driver.find_element_by_class_name("EymY4b")
88+
each_info.append(comments.text.split()[0])
89+
90+
# get the footer information like installs, version, email, etc.
91+
stat_info_table = driver.find_elements_by_class_name("htlgb")
92+
stats = []
93+
for x in range (len(stat_info_table)):
94+
if x % 2 == 0:
95+
stats.append(stat_info_table[x].text)
96+
97+
stat_header = driver.find_elements_by_class_name("BgcNfc")
98+
for x in range (len(stat_header)):
99+
if stat_header[x].text == "Installs":
100+
each_info.append(stats[x])
101+
102+
if stat_header[x].text == "Current Version":
103+
each_info.append(stats[x])
104+
105+
if stat_header[x].text == "Updated":
106+
each_info.append(stats[x])
107+
108+
if stat_header[x].text == "Offered By":
109+
each_info.append(stats[x])
110+
111+
if stat_header[x].text == "Developer":
112+
for y in stats[x].split("\n"):
113+
if "@" in y:
114+
each_info.append(y)
115+
break
116+
117+
all_info.append(each_info)
118+
119+
except Exception as e:
120+
continue
121+
122+
print('\nAll info collected successfully!!\n')
123+
print('\nDONE!\n')
124+
125+
# Writing the collected info in a csv file
126+
write_to_csv(all_info)
127+
128+
ans = input('Press (y) to continue or any other key to exit: ').lower()
129+
if ans == 'y':
130+
continue
131+
else:
132+
print('Exiting..')
133+
break
134+
135+
136+
if __name__ == '__main__':
137+
scraper()
138+

Python/PlaystoreScraper/README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Google Playstore Scraper
2+
3+
- The python script with the help of Selenium Library scrapes the required information like app URL, app rating, number of comments, installs, developer info, etc
4+
- All this collected information is stored in a `.csv` file.
5+
6+
## Setup instructions
7+
8+
- The requirements can be installed as follows:
9+
10+
```shell
11+
$ pip install -r requirements.txt
12+
```
13+
14+
## Running the script
15+
16+
- In your terminal create a virtual environment with the given requirements and type the following command:
17+
18+
```shell
19+
$ python PlaystoreScraper
20+
```
21+
22+
## Working screenshots
23+
24+
![Image](https://i.imgur.com/vRGfa3S.png)
25+
26+
## Author
27+
[Rohini Rao](www.github.com/RohiniRG)
28+
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
selenium==3.141.0

0 commit comments

Comments
 (0)