mirror of
https://github.com/sherlock-project/sherlock
synced 2025-02-17 04:58:28 +00:00
add rank paramether to site_list.py
--rank or -r to update all page ranks
This commit is contained in:
parent
ad4c321f3f
commit
40fc51fc32
2 changed files with 14 additions and 8 deletions
20
site_list.py
20
site_list.py
|
@ -5,6 +5,7 @@ This module generates the listing of supported sites.
|
|||
import json
|
||||
import sys
|
||||
import requests
|
||||
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from datetime import datetime
|
||||
from collections import OrderedDict
|
||||
|
@ -15,7 +16,7 @@ def get_rank(domain_to_query):
|
|||
page = requests.get(url).text
|
||||
soup = bs(page, features="lxml")
|
||||
for span in soup.find_all('span'):
|
||||
if span.has_attr("class"):
|
||||
if span.has_attr("class"):
|
||||
if "globleRank" in span["class"]:
|
||||
for strong in span.find_all("strong"):
|
||||
if strong.has_attr("class"):
|
||||
|
@ -23,6 +24,14 @@ def get_rank(domain_to_query):
|
|||
result = int(strong.text.strip().replace(',', ''))
|
||||
return result
|
||||
|
||||
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter
|
||||
)
|
||||
parser.add_argument("--rank","-r",
|
||||
action="store_true", dest="rank", default=False,
|
||||
help="Update all website ranks (not recommended)."
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open("data.json", "r", encoding="utf-8") as data_file:
|
||||
data = json.load(data_file)
|
||||
|
||||
|
@ -34,13 +43,12 @@ with open("sites.md", "w") as site_file:
|
|||
for social_network in data:
|
||||
url_main = data.get(social_network).get("urlMain")
|
||||
site_file.write(f'{index}. [{social_network}]({url_main})\n')
|
||||
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
|
||||
sys.stdout.flush()
|
||||
data.get(social_network)["rank"] = get_rank(url_main)
|
||||
if args.rank == True:
|
||||
data.get(social_network)["rank"] = get_rank(url_main)
|
||||
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
|
||||
sys.stdout.flush()
|
||||
index = index + 1
|
||||
|
||||
site_file.write(f'\nAlexa.com rank data fetched at {datetime.utcnow()} UTC\n')
|
||||
|
||||
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
|
||||
|
||||
with open("data.json", "w") as data_file:
|
||||
|
|
2
sites.md
2
sites.md
|
@ -132,5 +132,3 @@
|
|||
131. [devRant](https://devrant.com/)
|
||||
132. [iMGSRC.RU](https://imgsrc.ru/)
|
||||
133. [last.fm](https://last.fm/)
|
||||
|
||||
Alexa.com rank data fetched at 2019-01-24 10:58:49.318475 UTC
|
||||
|
|
Loading…
Add table
Reference in a new issue