Merge pull request #1083 from jjlin/global-domains

Add a script to auto-generate the global equivalent domains JSON file
This commit is contained in:
Daniel García 2020-08-08 16:19:30 +02:00 committed by GitHub
commit 83dff9ae6e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 110 additions and 41 deletions

View file

@ -39,8 +39,7 @@
"Type": 1,
"Domains": [
"apple.com",
"icloud.com",
"tv.apple.com"
"icloud.com"
],
"Excluded": false
},
@ -106,6 +105,7 @@
"passport.net",
"windows.com",
"microsoftonline.com",
"office.com",
"office365.com",
"microsoftstore.com",
"xbox.com",
@ -193,7 +193,12 @@
"amazon.it",
"amazon.com.au",
"amazon.co.nz",
"amazon.in"
"amazon.in",
"amazon.com.mx",
"amazon.nl",
"amazon.sg",
"amazon.com.tr",
"amazon.ae"
],
"Excluded": false
},
@ -386,8 +391,7 @@
"alibaba.com",
"aliexpress.com",
"aliyun.com",
"net.cn",
"www.net.cn"
"net.cn"
],
"Excluded": false
},
@ -717,41 +721,27 @@
"eventbrite.ca",
"eventbrite.ch",
"eventbrite.cl",
"eventbrite.co.id",
"eventbrite.co.in",
"eventbrite.co.kr",
"eventbrite.co",
"eventbrite.co.nz",
"eventbrite.co.uk",
"eventbrite.co.ve",
"eventbrite.com",
"eventbrite.com.ar",
"eventbrite.com.au",
"eventbrite.com.bo",
"eventbrite.com.br",
"eventbrite.com.co",
"eventbrite.com.hk",
"eventbrite.com.hn",
"eventbrite.com.mx",
"eventbrite.com.pe",
"eventbrite.com.sg",
"eventbrite.com.tr",
"eventbrite.com.tw",
"eventbrite.cz",
"eventbrite.de",
"eventbrite.dk",
"eventbrite.es",
"eventbrite.fi",
"eventbrite.fr",
"eventbrite.gy",
"eventbrite.hu",
"eventbrite.hk",
"eventbrite.ie",
"eventbrite.is",
"eventbrite.it",
"eventbrite.jp",
"eventbrite.mx",
"eventbrite.nl",
"eventbrite.no",
"eventbrite.pl",
"eventbrite.pt",
"eventbrite.ru",
"eventbrite.se"
"eventbrite.se",
"eventbrite.sg"
],
"Excluded": false
},
@ -769,15 +759,6 @@
},
{
"Type": 75,
"Domains": [
"netcup.de",
"netcup.eu",
"customercontrolpanel.de"
],
"Excluded": false
},
{
"Type": 76,
"Domains": [
"docusign.com",
"docusign.net"
@ -785,7 +766,7 @@
"Excluded": false
},
{
"Type": 77,
"Type": 76,
"Domains": [
"envato.com",
"themeforest.net",
@ -799,7 +780,7 @@
"Excluded": false
},
{
"Type": 78,
"Type": 77,
"Domains": [
"x10hosting.com",
"x10premium.com"
@ -807,7 +788,7 @@
"Excluded": false
},
{
"Type": 79,
"Type": 78,
"Domains": [
"dnsomatic.com",
"opendns.com",
@ -816,7 +797,7 @@
"Excluded": false
},
{
"Type": 80,
"Type": 79,
"Domains": [
"cagreatamerica.com",
"canadaswonderland.com",
@ -835,11 +816,19 @@
"Excluded": false
},
{
"Type": 81,
"Type": 80,
"Domains": [
"ubnt.com",
"ui.com"
],
"Excluded": false
},
{
"Type": 81,
"Domains": [
"discordapp.com",
"discord.com"
],
"Excluded": false
}
]
]

80
tools/global_domains.py Executable file
View file

@ -0,0 +1,80 @@
#!/usr/bin/env python3
#
# This script generates a global equivalent domains JSON file from
# the upstream Bitwarden source repo.
#
import json
import re
import sys
import urllib.request
from collections import OrderedDict
if len(sys.argv) != 2:
print("usage: %s <OUTPUT-FILE>" % sys.argv[0])
print()
print("This script generates a global equivalent domains JSON file from")
print("the upstream Bitwarden source repo.")
sys.exit(1)
OUTPUT_FILE = sys.argv[1]
BASE_URL = 'https://github.com/bitwarden/server/raw/master'
ENUMS_URL = '%s/src/Core/Enums/GlobalEquivalentDomainsType.cs' % BASE_URL
DOMAIN_LISTS_URL = '%s/src/Core/Utilities/StaticStore.cs' % BASE_URL
# Enum lines look like:
#
# EnumName0 = 0,
# EnumName1 = 1,
#
ENUM_RE = re.compile(
r'\s*' # Leading whitespace (optional).
r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
r'\s*=\s*' # '=' with optional surrounding whitespace.
r'([0-9]+)' # Enum value (capture group 2).
)
# Global domains lines look like:
#
# GlobalDomains.Add(GlobalEquivalentDomainsType.EnumName, new List<string> { "x.com", "y.com" });
#
DOMAIN_LIST_RE = re.compile(
r'\s*' # Leading whitespace (optional).
r'GlobalDomains\.Add\(GlobalEquivalentDomainsType\.'
r'([_0-9a-zA-Z]+)' # Enum name (capture group 1).
r'\s*,\s*new List<string>\s*{'
r'([^}]+)' # Domain list (capture group 2).
r'}\);'
)
enums = dict()
domain_lists = OrderedDict()
# Read in the enum names and values.
with urllib.request.urlopen(ENUMS_URL) as response:
for ln in response.read().decode('utf-8').split('\n'):
m = ENUM_RE.match(ln)
if m:
enums[m.group(1)] = int(m.group(2))
# Read in the domain lists.
with urllib.request.urlopen(DOMAIN_LISTS_URL) as response:
for ln in response.read().decode('utf-8').split('\n'):
m = DOMAIN_LIST_RE.match(ln)
if m:
# Strip double quotes and extraneous spaces in each domain.
domain_lists[m.group(1)] = [d.strip(' "') for d in m.group(2).split(",")]
# Build the global domains data structure.
global_domains = []
for name, domain_list in domain_lists.items():
entry = OrderedDict()
entry["Type"] = enums[name]
entry["Domains"] = domain_list
entry["Excluded"] = False
global_domains.append(entry)
# Write out the global domains JSON file.
with open(OUTPUT_FILE, 'w') as f:
json.dump(global_domains, f, indent=2)