Merge pull request #2099 from ppfeister/feature/schema

Set schema for manifest
2024-11-22 20:03:03 +00:00 · 2024-05-07 19:25:59 +00:00 · 2024-05-07 19:25:59 +00:00 · d0c8282e5e
commit d0c8282e5e
parent c9394451fb d118c1c43c
4 changed files with 97 additions and 11 deletions
--- a/sherlock/resources/data.json
+++ b/sherlock/resources/data.json
@ -1,4 +1,5 @@
 {
  "$schema": "data.schema.json",
  "1337x": {
    "errorMsg": [
      "<title>Error something went wrong.</title>",
@ -498,8 +499,7 @@
    "url": "https://codeforces.com/profile/{}",
    "urlMain": "https://codeforces.com/",
    "urlProbe": "https://codeforces.com/api/user.info?handles={}",
-    "username_claimed": "tourist",
+    "username_claimed": "tourist"
    "username_unclaimed": "noonewouldeverusethis7"
  },
  "Codepen": {
    "errorType": "status_code",
@ -996,7 +996,7 @@
    "username_claimed": "naveennamani877"
  },
  "HackerNews": {
-    "::::README::::": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
+    "__comment__": "First errMsg invalid, second errMsg rate limited. Not ideal. Adjust for better rate limit filtering.",
    "errorMsg": [
      "No such user.",
      "Sorry."
@ -1098,8 +1098,7 @@
    "errorUrl": "https://irc-galleria.net/users/search?username={}",
    "url": "https://irc-galleria.net/user/{}",
    "urlMain": "https://irc-galleria.net/",
-    "username_claimed": "appas",
+    "username_claimed": "appas"
    "username_unclaimed": "noonewouldeverusethis77"
  },
  "Icons8 Community": {
    "errorType": "status_code",
@ -1186,7 +1185,6 @@
  },
  "Jimdo": {
    "errorType": "status_code",
    "noPeriod": "True",
    "regexCheck": "^[a-zA-Z0-9@_-]$",
    "url": "https://{}.jimdosite.com",
    "urlMain": "https://jimdosite.com/",
@ -1386,8 +1384,7 @@
    "url": "https://monkeytype.com/profile/{}",
    "urlMain": "https://monkeytype.com/",
    "urlProbe": "https://api.monkeytype.com/users/{}/profile",
-    "username_claimed": "Lost_Arrow",
+    "username_claimed": "Lost_Arrow"
    "username_unclaimed": "noonewouldeverusethis7"
  },
  "Motherless": {
    "errorMsg": "no longer a member",
@ -2327,7 +2324,7 @@
    "username_claimed": "blue"
  },
  "YandexMusic": {
-    "::::README::::": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.",
+    "__comment__": "The first and third errorMsg relate to geo-restrictions and bot detection/captchas.",
    "errorMsg": [
      "\u041e\u0448\u0438\u0431\u043a\u0430 404",
      "<meta name=\"description\" content=\"\u041e\u0442\u043a\u0440\u044b\u0432\u0430\u0439\u0442\u0435 \u043d\u043e\u0432\u0443\u044e \u043c\u0443\u0437\u044b\u043a\u0443 \u043a\u0430\u0436\u0434\u044b\u0439 \u0434\u0435\u043d\u044c.",
--- a/sherlock/resources/data.schema.json
+++ b/sherlock/resources/data.schema.json
@ -0,0 +1,80 @@
 {
    "$schema": "https://json-schema.org/draft/2020-12/schema",
    "title": "Sherlock Targets",
    "description": "Social media target to probe for existence of usernames",
    "type": "object",
    "properties": {
        "$schema": { "type": "string" }
    },
    "patternProperties": {
        "^(?!\\$).*?$": {
            "type": "object",
            "description": "User-friendly target name",
            "required": [ "url", "urlMain", "errorType", "username_claimed" ],
            "properties": {
                "url": { "type": "string" },
                "urlMain": { "type": "string" },
                "urlProbe": { "type": "string" },
                "username_claimed": { "type": "string" },
                "regexCheck": { "type": "string" },
                "isNSFW": { "type": "boolean" },
                "headers": { "type": "object" },
                "request_payload": { "type": "object" },
                "__comment__": {
                    "type": "string",
                    "description": "Used to clarify important target information if (and only if) a commit message would not suffice.\nThis key should not be parsed anywhere within Sherlock."
                },
                "tags": {
                    "oneOf": [
                        { "$ref": "#/$defs/tag" },
                        { "type": "array", "items": { "$ref": "#/$defs/tag" } }
                    ]
                },
                "request_method": {
                    "type": "string",
                    "enum": [ "GET", "POST", "HEAD", "PUT" ]
                },
                "errorType": {
                    "type": "string",
                    "enum": [ "message", "response_url", "status_code" ]
                },
                "errorMsg": {
                    "oneOf": [
                        { "type": "string" },
                        { "type": "array", "items": { "type": "string" } }
                    ]
                },
                "errorCode": {
                    "oneOf": [
                        { "type": "integer" },
                        { "type": "array", "items": { "type": "integer" } }
                    ]
                },
                "errorUrl": { "type": "string" },
                "response_url": { "type": "string" }
            },
            "dependencies": {
                "errorMsg": {
                    "properties" : { "errorType": { "const": "message" } }
                },
                "errorUrl": {
                    "properties": { "errorType": { "const": "response_url" } }
                },
                "errorCode": {
                    "properties": { "errorType": { "const": "status_code" } }
                }
            },
            "if": { "properties": { "errorType": { "const": "message" } } },
            "then": { "required": [ "errorMsg" ] },
            "else": {
                "if": { "properties": { "errorType": { "const": "response_url" } } },
                "then": { "required": [ "errorUrl" ] }
            },
            "additionalProperties": false
        }
    },
    "additionalProperties": false,
    "$defs": {
        "tag": { "type": "string", "enum": [ "adult", "gaming" ] }
    }
 }
--- a/sherlock/sites.py
+++ b/sherlock/sites.py
@ -153,6 +153,11 @@ class SitesInformation:
                                        f"data file '{data_file_path}'."
                                        )
        try:
            site_data.pop('$schema')
        except:
            pass
        self.sites = {}
        # Add all site information from the json file to internal site list.
--- a/site_list.py
+++ b/site_list.py
@ -5,10 +5,14 @@ import json
 # Read the data.json file
 with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file:
-    data = json.load(data_file)
+    data: dict = json.load(data_file)
 # Removes schema-specific keywords for proper processing
 social_networks: dict = dict(data)
 social_networks.pop('$schema')
 # Sort the social networks in alphanumeric order
-social_networks = sorted(data.items())
+social_networks: list = sorted(social_networks.items())
 # Write the list of supported sites to sites.md
 with open("sites.md", "w") as site_file: