Merge pull request #2099 from ppfeister/feature/schema

Set schema for manifest
This commit is contained in:
Siddharth Dushantha 2024-05-07 19:25:59 +00:00 committed by GitHub
commit d0c8282e5e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 97 additions and 11 deletions

View file

@ -1,4 +1,5 @@
{ {
"$schema": "data.schema.json",
"1337x": { "1337x": {
"errorMsg": [ "errorMsg": [
"<title>Error something went wrong.</title>", "<title>Error something went wrong.</title>",
@ -498,8 +499,7 @@
"url": "https://codeforces.com/profile/{}", "url": "https://codeforces.com/profile/{}",
"urlMain": "https://codeforces.com/", "urlMain": "https://codeforces.com/",
"urlProbe": "https://codeforces.com/api/user.info?handles={}", "urlProbe": "https://codeforces.com/api/user.info?handles={}",
"username_claimed": "tourist", "username_claimed": "tourist"
"username_unclaimed": "noonewouldeverusethis7"
}, },
"Codepen": { "Codepen": {
"errorType": "status_code", "errorType": "status_code",
@ -996,7 +996,7 @@
"username_claimed": "naveennamani877" "username_claimed": "naveennamani877"
}, },
"HackerNews": { "HackerNews": {
"::::README::::": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.", "__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
"errorMsg": [ "errorMsg": [
"No such user.", "No such user.",
"Sorry." "Sorry."
@ -1098,8 +1098,7 @@
"errorUrl": "https://irc-galleria.net/users/search?username={}", "errorUrl": "https://irc-galleria.net/users/search?username={}",
"url": "https://irc-galleria.net/user/{}", "url": "https://irc-galleria.net/user/{}",
"urlMain": "https://irc-galleria.net/", "urlMain": "https://irc-galleria.net/",
"username_claimed": "appas", "username_claimed": "appas"
"username_unclaimed": "noonewouldeverusethis77"
}, },
"Icons8 Community": { "Icons8 Community": {
"errorType": "status_code", "errorType": "status_code",
@ -1186,7 +1185,6 @@
}, },
"Jimdo": { "Jimdo": {
"errorType": "status_code", "errorType": "status_code",
"noPeriod": "True",
"regexCheck": "^[a-zA-Z0-9@_-]$", "regexCheck": "^[a-zA-Z0-9@_-]$",
"url": "https://{}.jimdosite.com", "url": "https://{}.jimdosite.com",
"urlMain": "https://jimdosite.com/", "urlMain": "https://jimdosite.com/",
@ -1386,8 +1384,7 @@
"url": "https://monkeytype.com/profile/{}", "url": "https://monkeytype.com/profile/{}",
"urlMain": "https://monkeytype.com/", "urlMain": "https://monkeytype.com/",
"urlProbe": "https://api.monkeytype.com/users/{}/profile", "urlProbe": "https://api.monkeytype.com/users/{}/profile",
"username_claimed": "Lost_Arrow", "username_claimed": "Lost_Arrow"
"username_unclaimed": "noonewouldeverusethis7"
}, },
"Motherless": { "Motherless": {
"errorMsg": "no longer a member", "errorMsg": "no longer a member",
@ -2327,7 +2324,7 @@
"username_claimed": "blue" "username_claimed": "blue"
}, },
"YandexMusic": { "YandexMusic": {
"::::README::::": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.", "__comment__": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.",
"errorMsg": [ "errorMsg": [
"\u041e\u0448\u0438\u0431\u043a\u0430 404", "\u041e\u0448\u0438\u0431\u043a\u0430 404",
"<meta name=\"description\" content=\"\u041e\u0442\u043a\u0440\u044b\u0432\u0430\u0439\u0442\u0435 \u043d\u043e\u0432\u0443\u044e \u043c\u0443\u0437\u044b\u043a\u0443 \u043a\u0430\u0436\u0434\u044b\u0439 \u0434\u0435\u043d\u044c.", "<meta name=\"description\" content=\"\u041e\u0442\u043a\u0440\u044b\u0432\u0430\u0439\u0442\u0435 \u043d\u043e\u0432\u0443\u044e \u043c\u0443\u0437\u044b\u043a\u0443 \u043a\u0430\u0436\u0434\u044b\u0439 \u0434\u0435\u043d\u044c.",

View file

@ -0,0 +1,80 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Sherlock Targets",
"description": "Social media target to probe for existence of usernames",
"type": "object",
"properties": {
"$schema": { "type": "string" }
},
"patternProperties": {
"^(?!\\$).*?$": {
"type": "object",
"description": "User-friendly target name",
"required": [ "url", "urlMain", "errorType", "username_claimed" ],
"properties": {
"url": { "type": "string" },
"urlMain": { "type": "string" },
"urlProbe": { "type": "string" },
"username_claimed": { "type": "string" },
"regexCheck": { "type": "string" },
"isNSFW": { "type": "boolean" },
"headers": { "type": "object" },
"request_payload": { "type": "object" },
"__comment__": {
"type": "string",
"description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
},
"tags": {
"oneOf": [
{ "$ref": "#/$defs/tag" },
{ "type": "array", "items": { "$ref": "#/$defs/tag" } }
]
},
"request_method": {
"type": "string",
"enum": [ "GET", "POST", "HEAD", "PUT" ]
},
"errorType": {
"type": "string",
"enum": [ "message", "response_url", "status_code" ]
},
"errorMsg": {
"oneOf": [
{ "type": "string" },
{ "type": "array", "items": { "type": "string" } }
]
},
"errorCode": {
"oneOf": [
{ "type": "integer" },
{ "type": "array", "items": { "type": "integer" } }
]
},
"errorUrl": { "type": "string" },
"response_url": { "type": "string" }
},
"dependencies": {
"errorMsg": {
"properties" : { "errorType": { "const": "message" } }
},
"errorUrl": {
"properties": { "errorType": { "const": "response_url" } }
},
"errorCode": {
"properties": { "errorType": { "const": "status_code" } }
}
},
"if": { "properties": { "errorType": { "const": "message" } } },
"then": { "required": [ "errorMsg" ] },
"else": {
"if": { "properties": { "errorType": { "const": "response_url" } } },
"then": { "required": [ "errorUrl" ] }
},
"additionalProperties": false
}
},
"additionalProperties": false,
"$defs": {
"tag": { "type": "string", "enum": [ "adult", "gaming" ] }
}
}

View file

@ -153,6 +153,11 @@ class SitesInformation:
f"data file '{data_file_path}'." f"data file '{data_file_path}'."
) )
try:
site_data.pop('$schema')
except:
pass
self.sites = {} self.sites = {}
# Add all site information from the json file to internal site list. # Add all site information from the json file to internal site list.

View file

@ -5,10 +5,14 @@ import json
# Read the data.json file # Read the data.json file
with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file: with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
data = json.load(data_file) data: dict = json.load(data_file)
# Removes schema-specific keywords for proper processing
social_networks: dict = dict(data)
social_networks.pop('$schema')
# Sort the social networks in alphanumeric order # Sort the social networks in alphanumeric order
social_networks = sorted(data.items()) social_networks: list = sorted(social_networks.items())
# Write the list of supported sites to sites.md # Write the list of supported sites to sites.md
with open("sites.md", "w") as site_file: with open("sites.md", "w") as site_file: