mirror of
https://github.com/petarov/google-android-app-ids.git
synced 2026-04-17 15:36:52 -04:00
156 lines
5.3 KiB
Python
Executable File
156 lines
5.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# coding: utf-8
|
|
# pylint: disable=C0111
|
|
# pylint: disable=C0103
|
|
# pylint: disable=C0330
|
|
|
|
from __future__ import print_function
|
|
from datetime import datetime
|
|
import os
|
|
import sys
|
|
import traceback
|
|
import csv
|
|
import json
|
|
from operator import itemgetter
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
import multiprocessing
|
|
from multiprocessing.pool import ThreadPool
|
|
|
|
LOCATION="us"
|
|
LANG="en"
|
|
SRC_CSV_FILE = "app-ids.csv"
|
|
SRC_MARKDOWN_FILE = "template.README.md"
|
|
SRC_APPS_PLACEHOLDER = '%%APPS%%'
|
|
SRC_APPSCOUNT_PLACEHOLDER = '%%APPS_COUNT%%'
|
|
SRC_TIMESTAMP_PLACEHOLDER = '%%BUILD_TIMESTAMP%%'
|
|
SRC_VERSION_PLACEHOLDER = '%%VERSION%%'
|
|
DIST_README = 'README.md'
|
|
DIST_JSON = 'google-app-ids.json'
|
|
DIST_CSV = 'google-app-ids.csv'
|
|
APP_LINK_PLACEHOLDER = "[{0}](https://play.google.com/store/apps/details?id={1}&hl={2}&gl={3})"
|
|
|
|
def csv_parse(csv_path):
|
|
print ('Parsing apps from CSV file...')
|
|
if not os.path.exists(csv_path):
|
|
raise Exception('{} source file could not be found!'.format(csv_path))
|
|
|
|
apps = []
|
|
with open(csv_path, 'r') as csvfile:
|
|
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
|
|
for row in reader:
|
|
apps.append([row[0]])
|
|
return apps[1:]
|
|
|
|
def apps_preprocess(apps):
|
|
apps_new = []
|
|
|
|
def app_download_details(app):
|
|
print ('|--Downloading ', app[0])
|
|
html_contents = requests.get(
|
|
'https://play.google.com/store/apps/details?id={0}&hl={1}&gl={2}'.format(app[0], LANG, LOCATION))
|
|
soup = BeautifulSoup(html_contents.text, 'html.parser')
|
|
logo_img = soup.find('img', attrs={'itemprop':'image',
|
|
'alt': 'Icon image'})
|
|
logo_src = logo_img['src'] if logo_img else ''
|
|
title = soup.find('span', attrs={'itemprop':'name'})
|
|
title_text = title.text if title else 'NOT FOUND'
|
|
cats = []
|
|
for ahref in soup.select('div[itemprop=genre] > a[aria-label]'):
|
|
cats.append(ahref['aria-label'])
|
|
return [app[0], title_text, logo_src, cats]
|
|
|
|
try:
|
|
cpus = max(min(multiprocessing.cpu_count(), 8), 2)
|
|
except NotImplementedError:
|
|
cpus = 2 # default
|
|
|
|
print ("| Downloading {0} app details using {1} parallel threads ...".format(
|
|
len(apps), cpus))
|
|
|
|
pool = ThreadPool(processes=cpus)
|
|
for app in apps:
|
|
pool.apply_async(app_download_details, args=(app,),
|
|
callback=lambda x : apps_new.append(x) if x[2] != 'NOT FOUND' \
|
|
else print ("|----> NOT FOUND: {}".format(x[0])))
|
|
|
|
pool.close()
|
|
pool.join()
|
|
|
|
return sorted(apps_new, key=lambda x: x[1].lower())
|
|
|
|
def dist_json(apps, output_path):
|
|
print ('Saving json file...')
|
|
json_data = []
|
|
for app in apps:
|
|
obj = {
|
|
'img_src': app[2],
|
|
'name': app[1],
|
|
'package_name': app[0],
|
|
'genres': app[3]
|
|
}
|
|
json_data.append(obj)
|
|
|
|
with open(output_path, 'w') as outfile:
|
|
json.dump(json_data, outfile, indent=2, ensure_ascii=False)
|
|
|
|
def dist_csv(apps, output_path):
|
|
print ('Saving csv file...')
|
|
with open(output_path, 'w') as outfile:
|
|
outfile.write("Icon,Name,Package,Genre\n")
|
|
for app in apps:
|
|
outfile.write("{0},{1},\"{2}\",\"{3}\"\n".format(
|
|
app[2], # logo
|
|
app[1], # name
|
|
app[0], # package
|
|
','.join(app[3]) # categories
|
|
))
|
|
|
|
def dist_readme(apps, template_path, package_path, output_path):
|
|
print ('Saving Markdown file...')
|
|
with open(template_path, 'r') as template:
|
|
template_contents = template.read()
|
|
|
|
app_contents = ''
|
|
for app in apps:
|
|
logo_src = app[2].replace('=w240', '=w80') if len(app) > 3 else ''
|
|
line = '|  | {1} | {2} | {3}'.format(logo_src,
|
|
APP_LINK_PLACEHOLDER.format(app[1], app[0], LANG, LOCATION), app[0],
|
|
', '.join(app[3]))
|
|
line += "\n"
|
|
app_contents += line
|
|
|
|
with open(package_path) as json_file:
|
|
package = json.load(json_file)
|
|
|
|
with open(output_path, 'w') as output:
|
|
today = datetime.today()
|
|
template_contents = template_contents.replace(SRC_VERSION_PLACEHOLDER,
|
|
package['version'])
|
|
template_contents = template_contents.replace(SRC_TIMESTAMP_PLACEHOLDER,
|
|
today.strftime('%b %d, %Y at %H:%M'))
|
|
template_contents = template_contents.replace(SRC_APPS_PLACEHOLDER,
|
|
app_contents)
|
|
template_contents = template_contents.replace(SRC_APPSCOUNT_PLACEHOLDER,
|
|
str(len(apps)))
|
|
output.write(template_contents)
|
|
|
|
#############################################################################
|
|
# Main
|
|
if __name__ == "__main__":
|
|
try:
|
|
cur_path = os.path.dirname(os.path.realpath(__file__))
|
|
csv_path = os.path.join(cur_path, 'src', SRC_CSV_FILE)
|
|
|
|
apps = apps_preprocess(csv_parse(csv_path))
|
|
dist_readme(apps, os.path.join(cur_path, 'src', SRC_MARKDOWN_FILE),
|
|
os.path.join(cur_path, 'package.json'),
|
|
os.path.join(cur_path, DIST_README))
|
|
dist_json(apps, os.path.join(cur_path, 'dist', DIST_JSON))
|
|
dist_csv(apps, os.path.join(cur_path, 'dist', DIST_CSV))
|
|
|
|
print ('Done.')
|
|
except Exception as e:
|
|
traceback.print_exc(file=sys.stdout)
|
|
print ("[ERROR] {0}".format(e))
|
|
sys.exit(1) |