Merge pull request #2099 from ppfeister/feature/schema

Set schema for manifest
This commit is contained in:
Siddharth Dushantha 2024-05-07 19:25:59 +00:00 committed by GitHub
commit d0c8282e5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 97 additions and 11 deletions

View file

@ -1,4 +1,5 @@
{
"$schema": "data.schema.json",
"1337x": {
"errorMsg": [
"<title>Error something went wrong.</title>",
@ -498,8 +499,7 @@
"url": "https://codeforces.com/profile/{}",
"urlMain": "https://codeforces.com/",
"urlProbe": "https://codeforces.com/api/user.info?handles={}",
"username_claimed": "tourist",
"username_unclaimed": "noonewouldeverusethis7"
"username_claimed": "tourist"
},
"Codepen": {
"errorType": "status_code",
@ -996,7 +996,7 @@
"username_claimed": "naveennamani877"
},
"HackerNews": {
"::::README::::": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
"__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
"errorMsg": [
"No such user.",
"Sorry."
@ -1098,8 +1098,7 @@
"errorUrl": "https://irc-galleria.net/users/search?username={}",
"url": "https://irc-galleria.net/user/{}",
"urlMain": "https://irc-galleria.net/",
"username_claimed": "appas",
"username_unclaimed": "noonewouldeverusethis77"
"username_claimed": "appas"
},
"Icons8 Community": {
"errorType": "status_code",
@ -1186,7 +1185,6 @@
},
"Jimdo": {
"errorType": "status_code",
"noPeriod": "True",
"regexCheck": "^[a-zA-Z0-9@_-]$",
"url": "https://{}.jimdosite.com",
"urlMain": "https://jimdosite.com/",
@ -1386,8 +1384,7 @@
"url": "https://monkeytype.com/profile/{}",
"urlMain": "https://monkeytype.com/",
"urlProbe": "https://api.monkeytype.com/users/{}/profile",
"username_claimed": "Lost_Arrow",
"username_unclaimed": "noonewouldeverusethis7"
"username_claimed": "Lost_Arrow"
},
"Motherless": {
"errorMsg": "no longer a member",
@ -2327,7 +2324,7 @@
"username_claimed": "blue"
},
"YandexMusic": {
"::::README::::": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.",
"__comment__": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.",
"errorMsg": [
"\u041e\u0448\u0438\u0431\u043a\u0430 404",
"<meta name=\"description\" content=\"\u041e\u0442\u043a\u0440\u044b\u0432\u0430\u0439\u0442\u0435 \u043d\u043e\u0432\u0443\u044e \u043c\u0443\u0437\u044b\u043a\u0443 \u043a\u0430\u0436\u0434\u044b\u0439 \u0434\u0435\u043d\u044c.",

View file

@ -0,0 +1,80 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Targets",
"description": "Social media target to probe for existence of usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "User-friendly target name",
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": [ "GET", "POST", "HEAD", "PUT" ]
},
"errorType": {
"type": "string",
"enum": [ "message", "response_url", "status_code" ]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"properties" : { "errorType": { "const": "message" } }
},
"errorUrl": {
"properties": { "errorType": { "const": "response_url" } }
},
"errorCode": {
"properties": { "errorType": { "const": "status_code" } }
}
},
"if": { "properties": { "errorType": { "const": "message" } } },
"then": { "required": [ "errorMsg" ] },
"else": {
"if": { "properties": { "errorType": { "const": "response_url" } } },
"then": { "required": [ "errorUrl" ] }
},
"additionalProperties": false
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
}
}

View file

@ -152,6 +152,11 @@ class SitesInformation:
raise FileNotFoundError(f"Problem while attempting to access "
f"data file '{data_file_path}'."
)
try:
site_data.pop('$schema')
except:
pass
self.sites = {}

View file

@ -5,10 +5,14 @@ import json
# Read the data.json file
with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file)
data: dict = json.load(data_file)
# Removes schema-specific keywords for proper processing
social_networks: dict = dict(data)
social_networks.pop('$schema')
# Sort the social networks in alphanumeric order
social_networks = sorted(data.items())
social_networks: list = sorted(social_networks.items())
# Write the list of supported sites to sites.md
with open("sites.md", "w") as site_file: