2018-12-30 17:50:10 +00:00
|
|
|
"""Sherlock: Supported Site Listing
|
|
|
|
This module generates the listing of supported sites.
|
|
|
|
"""
|
2018-12-29 23:16:43 +00:00
|
|
|
import json
|
2019-01-24 11:01:34 +00:00
|
|
|
import sys
|
|
|
|
import requests
|
2019-01-27 09:31:55 +00:00
|
|
|
import threading
|
2019-05-28 12:26:30 +00:00
|
|
|
import xml.etree.ElementTree as ET
|
2019-01-27 09:31:55 +00:00
|
|
|
from datetime import datetime
|
2019-01-27 09:50:45 +00:00
|
|
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
2019-01-27 09:31:55 +00:00
|
|
|
|
|
|
|
pool = list()
|
|
|
|
|
|
|
|
def get_rank(domain_to_query, dest):
|
2019-05-28 12:26:30 +00:00
|
|
|
|
|
|
|
#Retrieve ranking data via alexa API
|
|
|
|
url = f"http://data.alexa.com/data?cli=10&url={domain_to_query}"
|
|
|
|
xml_data = requests.get(url).text
|
|
|
|
root = ET.fromstring(xml_data)
|
|
|
|
try:
|
|
|
|
#Get ranking for this site.
|
|
|
|
dest['rank'] = int(root.find(".//REACH").attrib["RANK"])
|
|
|
|
except:
|
|
|
|
#We did not find the rank for some reason.
|
|
|
|
print(f"Error retrieving rank information for '{domain_to_query}'")
|
|
|
|
print(f" Returned XML is |{xml_data}|")
|
|
|
|
|
|
|
|
return
|
2019-01-27 09:50:45 +00:00
|
|
|
|
|
|
|
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
|
|
|
)
|
|
|
|
parser.add_argument("--rank","-r",
|
|
|
|
action="store_true", dest="rank", default=False,
|
|
|
|
help="Update all website ranks (not recommended)."
|
|
|
|
)
|
|
|
|
args = parser.parse_args()
|
2019-01-24 12:35:08 +00:00
|
|
|
|
2019-10-27 03:42:03 +00:00
|
|
|
with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
|
2019-03-09 11:33:42 +00:00
|
|
|
data = json.load(data_file)
|
2018-12-29 23:16:43 +00:00
|
|
|
|
2019-01-24 11:01:34 +00:00
|
|
|
with open("sites.md", "w") as site_file:
|
2019-03-09 11:33:42 +00:00
|
|
|
data_length = len(data)
|
|
|
|
site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n')
|
2019-01-24 11:01:34 +00:00
|
|
|
|
2019-03-09 11:33:42 +00:00
|
|
|
for social_network in data:
|
|
|
|
url_main = data.get(social_network).get("urlMain")
|
|
|
|
data.get(social_network)["rank"] = 0
|
|
|
|
if args.rank:
|
|
|
|
th = threading.Thread(target=get_rank, args=(url_main, data.get(social_network)))
|
|
|
|
else:
|
|
|
|
th = None
|
|
|
|
pool.append((social_network, url_main, th))
|
|
|
|
if args.rank:
|
|
|
|
th.start()
|
2019-01-27 09:31:55 +00:00
|
|
|
|
2019-03-09 11:33:42 +00:00
|
|
|
index = 1
|
|
|
|
for social_network, url_main, th in pool:
|
|
|
|
if args.rank:
|
|
|
|
th.join()
|
|
|
|
site_file.write(f'{index}. [{social_network}]({url_main})\n')
|
|
|
|
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
|
|
|
|
sys.stdout.flush()
|
|
|
|
index = index + 1
|
2019-01-24 11:01:34 +00:00
|
|
|
|
2019-03-09 11:33:42 +00:00
|
|
|
if args.rank:
|
|
|
|
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
|
2019-01-27 09:31:55 +00:00
|
|
|
|
2019-01-23 14:14:37 +00:00
|
|
|
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
|
|
|
|
|
2020-05-09 04:39:41 +00:00
|
|
|
with open("sherlock/resources/data.json", "w") as data_file:
|
2019-03-09 11:33:42 +00:00
|
|
|
data_file.write(sorted_json_data)
|
2019-01-23 14:14:37 +00:00
|
|
|
|
2019-01-27 09:31:55 +00:00
|
|
|
print("\nFinished updating supported site listing!")
|