Add Alexa.com rank updating functionality to site_list.py

This commit is contained in:
ptalmeida 2019-01-24 11:01:34 +00:00
parent 1ed24f5993
commit 46705ad0a1
3 changed files with 169 additions and 9 deletions

133
data.json
View file

@ -2,83 +2,98 @@
"500px": {
"errorMsg": "Sorry, no such page.",
"errorType": "message",
"rank": 2461,
"url": "https://500px.com/{}",
"urlMain": "https://500px.com/"
},
"9GAG": {
"errorType": "status_code",
"rank": 333,
"url": "https://9gag.com/u/{}",
"urlMain": "https://9gag.com/"
},
"About.me": {
"errorType": "status_code",
"rank": 12686,
"url": "https://about.me/{}",
"urlMain": "https://about.me/"
},
"Academia.edu": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 385,
"url": "https://independent.academia.edu/{}",
"urlMain": "https://www.academia.edu/"
},
"AngelList": {
"errorMsg": "We couldn't find what you were looking for.",
"errorType": "message",
"rank": 3469,
"url": "https://angel.co/{}",
"urlMain": "https://angel.co/"
},
"Aptoide": {
"errorType": "status_code",
"rank": 6107,
"url": "https://{}.en.aptoide.com/",
"urlMain": "https://en.aptoide.com/"
},
"AskFM": {
"errorType": "status_code",
"rank": 1109,
"url": "https://ask.fm/{}",
"urlMain": "https://ask.fm/"
},
"BLIP.fm": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 261919,
"url": "https://blip.fm/{}",
"urlMain": "https://blip.fm/"
},
"Badoo": {
"errorType": "status_code",
"rank": 949,
"url": "https://badoo.com/profile/{}",
"urlMain": "https://badoo.com/"
},
"Bandcamp": {
"errorMsg": "Sorry, that something isn\u2019t here",
"errorType": "message",
"rank": 573,
"url": "https://www.bandcamp.com/{}",
"urlMain": "https://www.bandcamp.com/"
},
"Basecamp": {
"errorMsg": "The account you were looking for doesn't exist",
"errorType": "message",
"rank": 1559,
"url": "https://{}.basecamphq.com",
"urlMain": "https://basecamp.com/"
},
"Behance": {
"errorMsg": "Oops! We can\u2019t find that page.",
"errorType": "message",
"rank": 394,
"url": "https://www.behance.net/{}",
"urlMain": "https://www.behance.net/"
},
"BitBucket": {
"errorType": "status_code",
"rank": 848,
"url": "https://bitbucket.org/{}",
"urlMain": "https://bitbucket.org/"
},
"BlackPlanet": {
"errorMsg": "My Hits",
"errorType": "message",
"rank": 107509,
"url": "http://blackplanet.com/{}",
"urlMain": "http://blackplanet.com/"
},
"Blogger": {
"errorType": "status_code",
"rank": 193,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.blogspot.com",
"urlMain": "https://www.blogger.com/"
@ -86,112 +101,132 @@
"BuzzFeed": {
"errorMsg": "We can't find the page you're looking for.",
"errorType": "message",
"rank": 294,
"url": "https://buzzfeed.com/{}",
"urlMain": "https://buzzfeed.com/"
},
"Canva": {
"errorMsg": "Not found (404)",
"errorType": "message",
"rank": 215,
"url": "https://www.canva.com/{}",
"urlMain": "https://www.canva.com/"
},
"Carbonmade": {
"errorMsg": "You've accidentally stumbled upon Mike's super secret nap grotto.",
"errorType": "message",
"rank": 32239,
"url": "https://{}.carbonmade.com",
"urlMain": "https://carbonmade.com/"
},
"CashMe": {
"errorType": "status_code",
"rank": 45066,
"url": "https://cash.me/{}",
"urlMain": "https://cash.me/"
},
"Cloob": {
"errorType": "status_code",
"rank": 8052,
"url": "https://www.cloob.com/name/{}",
"urlMain": "https://www.cloob.com/"
},
"Codecademy": {
"errorMsg": "404 error",
"errorType": "message",
"rank": 2314,
"url": "https://www.codecademy.com/{}",
"urlMain": "https://www.codecademy.com/"
},
"Codementor": {
"errorMsg": "404",
"errorType": "message",
"rank": 12456,
"url": "https://www.codementor.io/{}",
"urlMain": "https://www.codementor.io/"
},
"Codepen": {
"errorType": "status_code",
"rank": 863,
"url": "https://codepen.io/{}",
"urlMain": "https://codepen.io/"
},
"Coderwall": {
"errorMsg": "404! Our feels when that url is used",
"errorType": "message",
"rank": 17346,
"url": "https://coderwall.com/{}",
"urlMain": "https://coderwall.com/"
},
"ColourLovers": {
"errorMsg": "Page Not Loved",
"errorType": "message",
"rank": 30625,
"url": "https://www.colourlovers.com/love/{}",
"urlMain": "https://www.colourlovers.com/"
},
"Contently": {
"errorMsg": "We can't find that page!",
"errorType": "message",
"rank": 59032,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.contently.com/",
"urlMain": "https://contently.com/"
},
"Coroflot": {
"errorType": "status_code",
"rank": 37568,
"url": "https://www.coroflot.com/{}",
"urlMain": "https://coroflot.com/"
},
"CreativeMarket": {
"errorType": "status_code",
"rank": 1790,
"url": "https://creativemarket.com/{}",
"urlMain": "https://creativemarket.com/"
},
"Crevado": {
"errorType": "status_code",
"rank": 168903,
"url": "https://{}.crevado.com",
"urlMain": "https://crevado.com/"
},
"Crunchyroll": {
"errorType": "status_code",
"rank": 463,
"url": "https://www.crunchyroll.com/user/{}",
"urlMain": "https://www.crunchyroll.com/"
},
"DailyMotion": {
"errorType": "status_code",
"rank": 132,
"url": "https://www.dailymotion.com/{}",
"urlMain": "https://www.dailymotion.com/"
},
"Designspiration": {
"errorMsg": "Content Not Found",
"errorType": "message",
"rank": 24722,
"url": "https://www.designspiration.net/{}",
"urlMain": "https://www.designspiration.net/"
},
"DeviantART": {
"errorType": "status_code",
"rank": 185,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.deviantart.com",
"urlMain": "https://deviantart.com"
},
"Disqus": {
"errorType": "status_code",
"rank": 1311,
"url": "https://disqus.com/{}",
"urlMain": "https://disqus.com/"
},
"Dribbble": {
"errorMsg": "Whoops, that page is gone.",
"errorType": "message",
"rank": 937,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://dribbble.com/{}",
"urlMain": "https://dribbble.com/"
@ -199,66 +234,78 @@
"EVE Online": {
"errorMsg": "No results found with your search...",
"errorType": "message",
"rank": 11655,
"url": "https://evewho.com/search/{}",
"urlMain": "https://eveonline.com"
},
"Ebay": {
"errorMsg": "The User ID you entered was not found",
"errorType": "message",
"rank": 37,
"url": "https://www.ebay.com/usr/{}",
"urlMain": "https://www.ebay.com/"
},
"Ello": {
"errorMsg": "We couldn't find the page you're looking for",
"errorType": "message",
"rank": 29841,
"url": "https://ello.co/{}",
"urlMain": "https://ello.co/"
},
"Etsy": {
"errorType": "status_code",
"rank": 152,
"url": "https://www.etsy.com/shop/{}",
"urlMain": "https://www.etsy.com/"
},
"EyeEm": {
"errorType": "status_code",
"rank": 33189,
"url": "https://www.eyeem.com/u/{}",
"urlMain": "https://www.eyeem.com/"
},
"Facebook": {
"errorType": "status_code",
"rank": 3,
"regexCheck": "^[a-zA-Z0-9]{4,49}(?<!.com|.org|.net)$",
"url": "https://www.facebook.com/{}",
"urlMain": "https://www.facebook.com/"
},
"Flickr": {
"errorType": "status_code",
"rank": 361,
"url": "https://www.flickr.com/people/{}",
"urlMain": "https://www.flickr.com/"
},
"Flipboard": {
"errorMsg": "loading",
"errorType": "message",
"rank": 4494,
"regexCheck": "^([a-zA-Z0-9_]){1,15}$",
"url": "https://flipboard.com/@{}",
"urlMain": "https://flipboard.com/"
},
"Fotolog": {
"errorType": "status_code",
"rank": 46852,
"url": "https://fotolog.com/{}",
"urlMain": "https://fotolog.com/"
},
"Foursquare": {
"errorType": "status_code",
"rank": 2281,
"url": "https://foursquare.com/{}",
"urlMain": "https://foursquare.com/"
},
"Giphy": {
"errorType": "status_code",
"rank": 575,
"url": "https://giphy.com/{}",
"urlMain": "https://giphy.com/"
},
"GitHub": {
"errorType": "status_code",
"rank": 58,
"regexCheck": "^[a-zA-Z0-9](?:[a-zA-Z0-9]|-(?=[a-zA-Z0-9])){0,38}$",
"url": "https://www.github.com/{}",
"urlMain": "https://www.github.com/"
@ -266,190 +313,224 @@
"GitLab": {
"errorMsg": "You need to sign in or sign up before continuing.",
"errorType": "message",
"rank": 1764,
"url": "https://gitlab.com/{}",
"urlMain": "https://gitlab.com/"
},
"Gitee": {
"errorType": "status_code",
"rank": 3662,
"url": "https://gitee.com/{}",
"urlMain": "https://gitee.com/"
},
"GoodReads": {
"errorType": "status_code",
"rank": 384,
"url": "https://www.goodreads.com/{}",
"urlMain": "https://www.goodreads.com/"
},
"Google Plus": {
"errorType": "status_code",
"rank": 1,
"url": "https://plus.google.com/+{}",
"urlMain": "https://plus.google.com/"
},
"Gravatar": {
"errorType": "status_code",
"rank": 5621,
"url": "http://en.gravatar.com/{}",
"urlMain": "http://en.gravatar.com/"
},
"Gumroad": {
"errorMsg": "Page not found.",
"errorType": "message",
"rank": 4033,
"url": "https://www.gumroad.com/{}",
"urlMain": "https://www.gumroad.com/"
},
"HackerNews": {
"errorMsg": "No such user.",
"errorType": "message",
"rank": 3035,
"url": "https://news.ycombinator.com/user?id={}",
"urlMain": "https://news.ycombinator.com/"
},
"HackerOne": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 39153,
"url": "https://hackerone.com/{}",
"urlMain": "https://hackerone.com/"
},
"House-Mixes.com": {
"errorMsg": "Profile Not Found",
"errorType": "message",
"rank": 126875,
"url": "https://www.house-mixes.com/profile/{}",
"urlMain": "https://www.house-mixes.com/"
},
"Houzz": {
"errorMsg": "The page you requested was not found.",
"errorType": "message",
"rank": 2280,
"url": "https://houzz.com/user/{}",
"urlMain": "https://houzz.com/"
},
"HubPages": {
"errorType": "status_code",
"rank": 9948,
"url": "https://hubpages.com/@{}",
"urlMain": "https://hubpages.com/"
},
"IFTTT": {
"errorMsg": "The requested page or file does not exist",
"errorType": "message",
"rank": 4530,
"url": "https://www.ifttt.com/p/{}",
"urlMain": "https://www.ifttt.com/"
},
"ImageShack": {
"errorType": "response_url",
"errorUrl": "https://imageshack.us/",
"rank": 38014,
"url": "https://imageshack.us/user/{}",
"urlMain": "https://imageshack.us/"
},
"Imgur": {
"errorType": "status_code",
"rank": 63,
"url": "https://imgur.com/user/{}",
"urlMain": "https://imgur.com/"
},
"Instagram": {
"errorMsg": "The link you followed may be broken",
"errorType": "message",
"rank": 16,
"url": "https://www.instagram.com/{}",
"urlMain": "https://www.instagram.com/"
},
"Instructables": {
"errorMsg": "404: We're sorry, things break sometimes",
"errorType": "message",
"rank": 1011,
"url": "https://www.instructables.com/member/{}",
"urlMain": "https://www.instructables.com/"
},
"Issuu": {
"errorType": "status_code",
"rank": 901,
"url": "https://issuu.com/{}",
"urlMain": "https://issuu.com/"
},
"Itch.io": {
"errorType": "status_code",
"rank": 2293,
"url": "https://{}.itch.io/",
"urlMain": "https://itch.io/"
},
"Jimdo": {
"errorType": "status_code",
"noPeriod": "True",
"rank": 104890,
"url": "https://{}.jimdosite.com",
"urlMain": "https://jimdosite.com/"
},
"Kaggle": {
"errorType": "status_code",
"rank": 2714,
"url": "https://www.kaggle.com/{}",
"urlMain": "https://www.kaggle.com/"
},
"KanoWorld": {
"errorType": "status_code",
"rank": 72187,
"url": "https://api.kano.me/progress/user/{}",
"urlMain": "https://world.kano.me/"
},
"Keybase": {
"errorType": "status_code",
"rank": 101635,
"url": "https://keybase.io/{}",
"urlMain": "https://keybase.io/"
},
"Kik": {
"errorMsg": "The page you requested was not found",
"errorType": "message",
"rank": 323474,
"url": "https://ws2.kik.com/user/{}",
"urlMain": "http://kik.me/"
},
"Kongregate": {
"errorMsg": "Sorry, no account with that name was found.",
"errorType": "message",
"rank": 1955,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://www.kongregate.com/accounts/{}",
"urlMain": "https://www.kongregate.com/"
},
"Launchpad": {
"errorType": "status_code",
"rank": 7392,
"url": "https://launchpad.net/~{}",
"urlMain": "https://launchpad.net/"
},
"Letterboxd": {
"errorMsg": "Sorry, we can\u2019t find the page you\u2019ve requested.",
"errorType": "message",
"rank": 2590,
"url": "https://letterboxd.com/{}",
"urlMain": "https://letterboxd.com/"
},
"LiveJournal": {
"errorMsg": "Unknown Journal",
"errorType": "message",
"rank": 223,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.livejournal.com",
"urlMain": "https://www.livejournal.com/"
},
"Mastodon": {
"errorType": "status_code",
"rank": 978330,
"url": "https://mstdn.io/@{}",
"urlMain": "https://mstdn.io/"
},
"Medium": {
"errorType": "status_code",
"rank": 216,
"url": "https://medium.com/@{}",
"urlMain": "https://medium.com/"
},
"MeetMe": {
"errorType": "response_url",
"errorUrl": "https://www.meetme.com/",
"rank": 14535,
"url": "https://www.meetme.com/{}",
"urlMain": "https://www.meetme.com/"
},
"MixCloud": {
"errorMsg": "Page Not Found",
"errorType": "message",
"rank": 3187,
"url": "https://www.mixcloud.com/{}",
"urlMain": "https://www.mixcloud.com/"
},
"MyAnimeList": {
"errorType": "status_code",
"rank": 477,
"url": "https://myanimelist.net/profile/{}",
"urlMain": "https://myanimelist.net/"
},
"NameMC (Minecraft.net skins)": {
"errorMsg": "Profiles: 0 results",
"errorType": "message",
"rank": 5988,
"url": "https://namemc.com/profile/{}",
"urlMain": "https://namemc.com/"
},
"Newgrounds": {
"errorType": "status_code",
"rank": 2479,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.newgrounds.com",
"urlMain": "https://newgrounds.com"
@ -457,294 +538,346 @@
"Pastebin": {
"errorType": "response_url",
"errorUrl": "https://pastebin.com/index",
"rank": 1043,
"url": "https://pastebin.com/u/{}",
"urlMain": "https://pastebin.com/"
},
"Patreon": {
"errorType": "status_code",
"rank": 339,
"url": "https://www.patreon.com/{}",
"urlMain": "https://www.patreon.com/"
},
"Pexels": {
"errorMsg": "Ouch, something went wrong!",
"errorType": "message",
"rank": 614,
"url": "https://www.pexels.com/@{}",
"urlMain": "https://www.pexels.com/"
},
"Photobucket": {
"errorType": "status_code",
"rank": 3331,
"url": "https://photobucket.com/user/{}/library",
"urlMain": "https://photobucket.com/"
},
"Pinterest": {
"errorType": "response_url",
"errorUrl": "https://www.pinterest.com/?show_error=true",
"rank": 76,
"url": "https://www.pinterest.com/{}/",
"urlMain": "https://www.pinterest.com/"
},
"Pixabay": {
"errorType": "status_code",
"rank": 403,
"url": "https://pixabay.com/en/users/{}",
"urlMain": "https://pixabay.com/"
},
"Plug.DJ": {
"errorType": "status_code",
"rank": 34127,
"url": "https://plug.dj/@/{}",
"urlMain": "https://plug.dj/"
},
"ProductHunt": {
"errorMsg": "Product Hunt is a curation of the best new products",
"errorType": "message",
"rank": 4138,
"url": "https://www.producthunt.com/@{}",
"urlMain": "https://www.producthunt.com/"
},
"Quora": {
"errorType": "status_code",
"rank": 91,
"url": "https://www.quora.com/profile/{}",
"urlMain": "https://www.quora.com/"
},
"Rajce.net": {
"errorMsg": "410",
"errorType": "message",
"rank": 1187,
"url": "https://{}.rajce.idnes.cz/",
"urlMain": "https://www.rajce.idnes.cz/"
},
"Reddit": {
"errorMsg": "page not found",
"errorType": "message",
"rank": 17,
"url": "https://www.reddit.com/user/{}",
"urlMain": "https://www.reddit.com/"
},
"Repl.it": {
"errorMsg": "404",
"errorType": "message",
"rank": 8926,
"url": "https://repl.it/@{}",
"urlMain": "https://repl.it/"
},
"ReverbNation": {
"errorMsg": "Sorry, we couldn't find that page",
"errorType": "message",
"rank": 10610,
"url": "https://www.reverbnation.com/{}",
"urlMain": "https://www.reverbnation.com/"
},
"Roblox": {
"errorMsg": "Page cannot be found or no longer exists",
"errorType": "message",
"rank": 105,
"url": "https://www.roblox.com/user.aspx?username={}",
"urlMain": "https://www.roblox.com/"
},
"Scribd": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 206,
"url": "https://www.scribd.com/{}",
"urlMain": "https://www.scribd.com/"
},
"Slack": {
"errorType": "status_code",
"rank": 244,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.slack.com",
"urlMain": "https://slack.com"
},
"SlideShare": {
"errorType": "status_code",
"rank": 180,
"url": "https://slideshare.net/{}",
"urlMain": "https://slideshare.net/"
},
"Smashcast": {
"errorType": "status_code",
"rank": 101679,
"url": "https://www.smashcast.tv/api/media/live/{}",
"urlMain": "https://www.smashcast.tv/"
},
"SoundCloud": {
"errorType": "status_code",
"rank": 113,
"url": "https://soundcloud.com/{}",
"urlMain": "https://soundcloud.com/"
},
"SourceForge": {
"errorType": "status_code",
"rank": 368,
"url": "https://sourceforge.net/u/{}",
"urlMain": "https://sourceforge.net/"
},
"Spotify": {
"errorType": "status_code",
"rank": 106,
"url": "https://open.spotify.com/user/{}",
"urlMain": "https://open.spotify.com/"
},
"Star Citizen": {
"errorType": "status_code",
"rank": 4694,
"url": "https://robertsspaceindustries.com/citizens/{}",
"urlMain": "https://robertsspaceindustries.com/"
},
"Steam": {
"errorMsg": "The specified profile could not be found",
"errorType": "message",
"rank": 164,
"url": "https://steamcommunity.com/id/{}",
"urlMain": "https://steamcommunity.com/"
},
"StreamMe": {
"errorType": "status_code",
"rank": 21153,
"url": "https://www.stream.me/{}",
"urlMain": "https://www.stream.me/"
},
"Taringa": {
"errorMsg": "Lo que est\u00e1s buscando no est\u00e1 por aqu\u00ed.",
"errorType": "message",
"rank": 935,
"url": "https://www.taringa.net/{}",
"urlMain": "https://taringa.net/"
},
"Telegram": {
"errorMsg": "<meta property=\"twitter:title\" content=\"Telegram: Contact",
"errorType": "message",
"rank": 1032,
"url": "https://t.me/{}",
"urlMain": "https://t.me/"
},
"Tinder": {
"errorMsg": "Looking for Someone?",
"errorType": "message",
"rank": 1368,
"url": "https://www.gotinder.com/@{}",
"urlMain": "https://tinder.com/"
},
"TradingView": {
"errorType": "status_code",
"rank": 592,
"url": "https://www.tradingview.com/u/{}/",
"urlMain": "https://www.tradingview.com/"
},
"Trakt": {
"errorMsg": "404",
"errorType": "message",
"rank": 5944,
"url": "https://www.trakt.tv/users/{}",
"urlMain": "https://www.trakt.tv/"
},
"Trip": {
"errorMsg": "Page not found",
"errorType": "message",
"rank": 2956,
"url": "https://www.trip.skyscanner.com/user/{}",
"urlMain": "https://www.trip.skyscanner.com/"
},
"TripAdvisor": {
"errorMsg": "This page is on vacation\u2026",
"errorType": "message",
"rank": 283,
"url": "https://tripadvisor.com/members/{}",
"urlMain": "https://tripadvisor.com/"
},
"Twitter": {
"errorMsg": "page doesn\u2019t exist",
"errorType": "message",
"rank": 11,
"url": "https://www.twitter.com/{}",
"urlMain": "https://www.twitter.com/"
},
"Unsplash": {
"errorType": "status_code",
"rank": 598,
"url": "https://unsplash.com/@{}",
"urlMain": "https://unsplash.com/"
},
"VK": {
"errorType": "status_code",
"rank": 19,
"url": "https://vk.com/{}",
"urlMain": "https://vk.com/"
},
"VSCO": {
"errorType": "status_code",
"rank": 3414,
"url": "https://vsco.co/{}",
"urlMain": "https://vsco.co/"
},
"Venmo": {
"errorType": "status_code",
"rank": 5004,
"url": "https://venmo.com/{}",
"urlMain": "https://venmo.com/"
},
"Vimeo": {
"errorMsg": "404 Not Found",
"errorType": "message",
"rank": 143,
"url": "https://vimeo.com/{}",
"urlMain": "https://vimeo.com/"
},
"VirusTotal": {
"errorMsg": "not found",
"errorType": "message",
"rank": 4265,
"url": "https://www.virustotal.com/ui/users/{}/trusted_users",
"urlMain": "https://www.virustotal.com/"
},
"Wattpad": {
"errorMsg": "This page seems to be missing...",
"errorType": "message",
"rank": 516,
"url": "https://www.wattpad.com/user/{}",
"urlMain": "https://www.wattpad.com/"
},
"We Heart It": {
"errorMsg": "Oops! You've landed on a moving target!",
"errorType": "message",
"rank": 3415,
"url": "https://weheartit.com/{}",
"urlMain": "https://weheartit.com/"
},
"WebNode": {
"errorMsg": "Ztratili jste se?",
"errorType": "message",
"rank": 16094,
"url": "https://{}.webnode.cz/",
"urlMain": "https://www.webnode.cz/"
},
"Wikia": {
"errorMsg": "does not exist",
"errorType": "message",
"rank": 70,
"url": "https://wikia.com/wiki/User:{}",
"urlMain": "http://www.wikia.com/"
},
"Wikipedia": {
"errorMsg": "If a page was recently created here, it may not be visible yet because of a delay in updating the database",
"errorType": "message",
"rank": 5,
"url": "https://www.wikipedia.org/wiki/User:{}",
"urlMain": "https://www.wikipedia.org/"
},
"Wix": {
"errorType": "status_code",
"rank": 416,
"url": "https://{}.wix.com",
"urlMain": "https://wix.com/"
},
"WordPress": {
"errorType": "response_url",
"errorUrl": "wordpress.com/typo/?subdomain=",
"rank": 60,
"regexCheck": "^[a-zA-Z][a-zA-Z0-9_-]*$",
"url": "https://{}.wordpress.com/",
"urlMain": "https://wordpress.com"
},
"YouPic": {
"errorType": "status_code",
"rank": 40894,
"url": "https://youpic.com/photographer/{}",
"urlMain": "https://youpic.com/"
},
"YouTube": {
"errorMsg": "Not Found",
"errorType": "message",
"rank": 2,
"url": "https://www.youtube.com/{}",
"urlMain": "https://www.youtube.com/"
},
"Younow": {
"errorMsg": "pageTitle || 'YouNow - Broadcast Live",
"errorType": "message",
"rank": 13432,
"url": "https://www.younow.com/{}",
"urlMain": "https://www.younow.com/"
},
"Zhihu": {
"errorMsg": "404",
"errorType": "message",
"rank": 85,
"url": "https://www.zhihu.com/people/{}",
"urlMain": "https://www.zhihu.com/"
},
"devRant": {
"errorType": "response_url",
"errorUrl": "https://devrant.com/",
"rank": 148214,
"url": "https://devrant.com/users/{}",
"urlMain": "https://devrant.com/"
},
"iMGSRC.RU": {
"errorType": "response_url",
"errorUrl": "https://imgsrc.ru/",
"rank": 3619,
"url": "https://imgsrc.ru/main/user.php?user={}",
"urlMain": "https://imgsrc.ru/"
},
"last.fm": {
"errorMsg": "Whoops! Sorry, but this page doesn't exist.",
"errorType": "message",
"rank": 1192,
"url": "https://last.fm/user/{}",
"urlMain": "https://last.fm/"
}

View file

@ -3,22 +3,47 @@
This module generates the listing of supported sites.
"""
import json
import sys
import requests
from bs4 import BeautifulSoup as bs
from datetime import datetime
def get_rank(domain_to_query):
result = -1
url = "http://www.alexa.com/siteinfo/" + domain_to_query
page = requests.get(url).text
soup = bs(page, features="lxml")
for span in soup.find_all('span'):
if span.has_attr("class"):
if "globleRank" in span["class"]:
for strong in span.find_all("strong"):
if strong.has_attr("class"):
if "metrics-data" in strong["class"]:
result = int(strong.text.strip().replace(',', ''))
return result
with open("data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
with open("sites.md", "w") as site_file:
data_length = len(data)
site_file.write(f'## List Of Supported Sites ({data_length} Sites In Total!)\n')
index = 1
for social_network in data:
url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n')
sys.stdout.write("\r{0}".format(f"Updated {index} out of {data_length} entries"))
sys.stdout.flush()
data.get(social_network)["rank"] = get_rank(url_main)
index = index + 1
site_file.write(f'\nAlexa.com rank data fetched at ({datetime.utcnow()} UTC)\n')
sorted_json_data = json.dumps(data, indent=2, sort_keys=True)
with open("data.json", "w") as data_file:
data_file.write(sorted_json_data)
with open("sites.md", "w") as site_file:
site_file.write(f'## List Of Supported Sites ({len(data)} Sites In Total!)\n')
print("\nFinished updating supported site listing!")
index = 1
for social_network in data:
url_main = data.get(social_network).get("urlMain")
site_file.write(f'{index}. [{social_network}]({url_main})\n')
index = index + 1
print("Finished updating supported site listing!")

View file

@ -132,3 +132,5 @@
131. [devRant](https://devrant.com/)
132. [iMGSRC.RU](https://imgsrc.ru/)
133. [last.fm](https://last.fm/)
Alexa.com rank data fetched at (2019-01-24 10:58:49.318475 UTC)