From d17268e737bf41e733dfe2e0abeb5187e47eb959 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 26 Oct 2019 22:42:03 -0500 Subject: [PATCH 01/36] Move content into sherlock sub-directory inside of repo. This directory will look very much like what the packaged version of Sherlock will look like when it is installed in the site-packages area. No real restructuring of the code has happened. This just gives a view of the directory structure. --- sherlock/__init__.py | 5 +++++ sherlock/__main__.py | 14 ++++++++++++++ load_proxies.py => sherlock/load_proxies.py | 0 data.json => sherlock/resources/data.json | 0 sherlock.py => sherlock/sherlock.py | 2 +- {tests => sherlock/tests}/__init__.py | 0 {tests => sherlock/tests}/all.py | 0 {tests => sherlock/tests}/base.py | 2 +- site_list.py | 2 +- 9 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 sherlock/__init__.py create mode 100644 sherlock/__main__.py rename load_proxies.py => sherlock/load_proxies.py (100%) rename data.json => sherlock/resources/data.json (100%) rename sherlock.py => sherlock/sherlock.py (99%) mode change 100755 => 100644 rename {tests => sherlock/tests}/__init__.py (100%) rename {tests => sherlock/tests}/all.py (100%) rename {tests => sherlock/tests}/base.py (99%) diff --git a/sherlock/__init__.py b/sherlock/__init__.py new file mode 100644 index 0000000..b0894c8 --- /dev/null +++ b/sherlock/__init__.py @@ -0,0 +1,5 @@ +"""Sherlock Module + +This module contains the main logic to search for usernames at social +networks. +""" diff --git a/sherlock/__main__.py b/sherlock/__main__.py new file mode 100644 index 0000000..8c2b2e7 --- /dev/null +++ b/sherlock/__main__.py @@ -0,0 +1,14 @@ +#! /usr/bin/env python3 + +""" +Sherlock: Find Usernames Across Social Networks Module + +This module contains the main logic to search for usernames at social +networks. +""" + +import sherlock + + +if __name__ == "__main__": + sherlock.main() diff --git a/load_proxies.py b/sherlock/load_proxies.py similarity index 100% rename from load_proxies.py rename to sherlock/load_proxies.py diff --git a/data.json b/sherlock/resources/data.json similarity index 100% rename from data.json rename to sherlock/resources/data.json diff --git a/sherlock.py b/sherlock/sherlock.py old mode 100755 new mode 100644 similarity index 99% rename from sherlock.py rename to sherlock/sherlock.py index 8797ccc..44adfcd --- a/sherlock.py +++ b/sherlock/sherlock.py @@ -405,7 +405,7 @@ def main(): help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080" ) parser.add_argument("--json", "-j", metavar="JSON_FILE", - dest="json_file", default="data.json", + dest="json_file", default="resources/data.json", help="Load data from a JSON file or an online, valid, JSON file.") parser.add_argument("--proxy_list", "-pl", metavar='PROXY_LIST', action="store", dest="proxy_list", default=None, diff --git a/tests/__init__.py b/sherlock/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to sherlock/tests/__init__.py diff --git a/tests/all.py b/sherlock/tests/all.py similarity index 100% rename from tests/all.py rename to sherlock/tests/all.py diff --git a/tests/base.py b/sherlock/tests/base.py similarity index 99% rename from tests/base.py rename to sherlock/tests/base.py index ff4c541..5e8ff5a 100644 --- a/tests/base.py +++ b/sherlock/tests/base.py @@ -28,7 +28,7 @@ class SherlockBaseTest(unittest.TestCase): warnings.simplefilter("ignore", ResourceWarning) # Load the data file with all site information. - data_file_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "data.json") + data_file_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "resources/data.json") with open(data_file_path, "r", encoding="utf-8") as raw: self.site_data_all = json.load(raw) diff --git a/site_list.py b/site_list.py index 3cc19a3..d1bd22d 100644 --- a/site_list.py +++ b/site_list.py @@ -36,7 +36,7 @@ parser.add_argument("--rank","-r", ) args = parser.parse_args() -with open("data.json", "r", encoding="utf-8") as data_file: +with open("sherlock/resources/data.json", "r", encoding="utf-8") as data_file: data = json.load(data_file) with open("sites.md", "w") as site_file: From 38b76486ddff2fe18e88c9c115da3eed0347b16d Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 10 Nov 2019 10:23:06 -0600 Subject: [PATCH 02/36] Update readme. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 905462b..e28e850 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ $ python3 -m pip install -r requirements.txt ## Usage ```bash -$ python3 sherlock.py --help +$ python3 -m sherlock --help usage: sherlock.py [-h] [--version] [--verbose] [--rank] [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--tor] [--unique-tor] [--csv] [--site SITE_NAME] From 578248bf8eb209217675f187a00db5528cae46fd Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 28 Nov 2019 18:50:53 -0600 Subject: [PATCH 03/36] Do not create session and request objects if we are going to end up creating new one for Tor requests. This just wastes time. --- sherlock/sherlock.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 1464939..46be623 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -168,11 +168,14 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr executor = ThreadPoolExecutor(max_workers=len(site_data)) # Create session based on request methodology - underlying_session = requests.session() - underlying_request = requests.Request() if tor or unique_tor: + #Requests using Tor obfuscation underlying_request = TorRequest() underlying_session = underlying_request.session + else: + #Normal requests + underlying_session = requests.session() + underlying_request = requests.Request() # Create multi-threaded session for all requests. Use our custom FuturesSession that exposes response time session = ElapsedFuturesSession( From d6d6b83b0658c5caa9b7a4670be0d3d6235c2dad Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 28 Nov 2019 18:56:26 -0600 Subject: [PATCH 04/36] Add documentation to extension to FuturesSession() which we use to get timing information. Fix problem where timing hook would not be installed properly if the hooks for the request was already filled out with a tuple. I am not sure if that is even possible, but if it does happen, then I just convert the tuple to a list and go on from there. --- sherlock/sherlock.py | 63 +++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 46be623..7a2ca67 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -34,30 +34,56 @@ global proxy_list proxy_list = [] -class ElapsedFuturesSession(FuturesSession): - """ - Extends FutureSession to add a response time metric to each request. - - This is taken (almost) directly from here: https://github.com/ross/requests-futures#working-in-the-background - """ - +class SherlockFuturesSession(FuturesSession): def request(self, method, url, hooks={}, *args, **kwargs): + """Request URL. + + This extends the FuturesSession request method to calculate a response + time metric to each request. + + It is taken (almost) directly from the following StackOverflow answer: + https://github.com/ross/requests-futures#working-in-the-background + + Keyword Arguments: + self -- This object. + method -- String containing method desired for request. + url -- String containing URL for request. + hooks -- Dictionary containing hooks to execute after + request finishes. + args -- Arguments. + kwargs -- Keyword arguments. + + Return Value: + Request object. + """ start = time() - def timing(r, *args, **kwargs): + def response_time(resp, *args, **kwargs): elapsed_sec = time() - start - r.elapsed = round(elapsed_sec * 1000) + resp.elapsed = round(elapsed_sec * 1000) + #Install hook to execute when response completes. + #Make sure that the time measurement hook is first, so we will not + #track any later hook's execution time. try: - if isinstance(hooks['response'], (list, tuple)): - # needs to be first so we don't time other hooks execution - hooks['response'].insert(0, timing) + if isinstance(hooks['response'], list): + hooks['response'].insert(0, response_time) + elif isinstance(hooks['response'], tuple): + #Convert tuple to list and insert time measurement hook first. + hooks['response'] = list(hooks['response']) + hooks['response'].insert(0, response_time) else: - hooks['response'] = [timing, hooks['response']] + #Must have previously contained a single hook function, + #so convert to list. + hooks['response'] = [response_time, hooks['response']] except KeyError: - hooks['response'] = timing + #No response hook was already defined, so install it ourselves. + hooks['response'] = [response_time] - return super(ElapsedFuturesSession, self).request(method, url, hooks=hooks, *args, **kwargs) + return super(SherlockFuturesSession, self).request(method, + url, + hooks=hooks, + *args, **kwargs) def print_info(title, info): @@ -177,9 +203,10 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr underlying_session = requests.session() underlying_request = requests.Request() - # Create multi-threaded session for all requests. Use our custom FuturesSession that exposes response time - session = ElapsedFuturesSession( - executor=executor, session=underlying_session) + #Create multi-threaded session for all requests. + session = SherlockFuturesSession(executor=executor, + session=underlying_session) + # Results from analysis of all sites results_total = {} From b02ef5e6e26ff01401cd27d2456e8a524a3cbe7c Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 28 Nov 2019 19:00:23 -0600 Subject: [PATCH 05/36] Use monotonic() time function. This method is guaranteed to not do flaky things during leap seconds or daylight savings times jumps. --- sherlock/sherlock.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 7a2ca67..e824cd1 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -16,7 +16,7 @@ import sys import random from argparse import ArgumentParser, RawDescriptionHelpFormatter from concurrent.futures import ThreadPoolExecutor -from time import time +from time import monotonic import requests from colorama import Fore, Style, init @@ -56,10 +56,10 @@ class SherlockFuturesSession(FuturesSession): Return Value: Request object. """ - start = time() + start = monotonic() def response_time(resp, *args, **kwargs): - elapsed_sec = time() - start + elapsed_sec = monotonic() - start resp.elapsed = round(elapsed_sec * 1000) #Install hook to execute when response completes. From d8c8cd8e7eea7610179d2fe02c8e6bcc3c3c52b7 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 28 Nov 2019 19:16:23 -0600 Subject: [PATCH 06/36] Leave the elapsed time for the request in seconds. Move format of time into ms to the presentation layer. --- sherlock/sherlock.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index e824cd1..ed64c16 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -59,8 +59,19 @@ class SherlockFuturesSession(FuturesSession): start = monotonic() def response_time(resp, *args, **kwargs): - elapsed_sec = monotonic() - start - resp.elapsed = round(elapsed_sec * 1000) + """Response Time Hook. + + Keyword Arguments: + resp -- Response object. + args -- Arguments. + kwargs -- Keyword arguments. + + Return Value: + N/A + """ + resp.elapsed = monotonic() - start + + return #Install hook to execute when response completes. #Make sure that the time measurement hook is first, so we will not @@ -102,7 +113,7 @@ def print_error(err, errstr, var, verbose=False): def format_response_time(response_time, verbose): - return " [{} ms]".format(response_time) if verbose else "" + return f" [{round(response_time * 1000)} ms]" if verbose else "" def print_found(social_network, url, response_time, verbose=False): @@ -239,7 +250,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr results_site["url_user"] = "" results_site['http_status'] = "" results_site['response_text'] = "" - results_site['response_time_ms'] = "" + results_site['response_time_s'] = "" else: # URL of user on site (if it exists) url = net_info["url"].format(username) @@ -380,7 +391,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr # Save results from request results_site['http_status'] = http_status results_site['response_text'] = response_text - results_site['response_time_ms'] = response_time + results_site['response_time_s'] = response_time # Add this site's results into final dictionary with all of the other results. results_total[social_network] = results_site @@ -621,7 +632,7 @@ def main(): 'url_user', 'exists', 'http_status', - 'response_time_ms' + 'response_time_s' ] ) for site in results: @@ -631,7 +642,7 @@ def main(): results[site]['url_user'], results[site]['exists'], results[site]['http_status'], - results[site]['response_time_ms'] + results[site]['response_time_s'] ] ) From ecf097e00d60d3c5ddbf36505adc3453acc0d8db Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 29 Nov 2019 08:37:02 -0600 Subject: [PATCH 07/36] Get rid of global variable amount. This is no longer used. --- sherlock/sherlock.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 06f617e..a1d7ba3 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -27,7 +27,6 @@ from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.9.7" -amount = 0 global proxy_list @@ -197,8 +196,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr response_text: Text that came back from request. May be None if there was an HTTP error when checking for existence. """ - global amount - print_info("Checking username", username) # Allow 1 thread for each external service, so `len(site_data)` threads total @@ -349,7 +346,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr if not error in r.text: print_found(social_network, url, response_time, verbose) exists = "yes" - amount = amount+1 else: if not print_found_only: print_not_found(social_network, response_time, verbose) @@ -360,7 +356,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr if not r.status_code >= 300 or r.status_code < 200: print_found(social_network, url, response_time, verbose) exists = "yes" - amount = amount+1 else: if not print_found_only: print_not_found(social_network, response_time, verbose) @@ -376,7 +371,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr # print_found(social_network, url, response_time, verbose) exists = "yes" - amount = amount+1 else: if not print_found_only: print_not_found(social_network, response_time, verbose) From 232a5f7d07e0b3712199fcfe78a787d2c87dd121 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 29 Nov 2019 08:39:18 -0600 Subject: [PATCH 08/36] Clarify meaning of "Total Websites" count. It is really "Total Websites Username Detected On". --- sherlock/sherlock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index a1d7ba3..3ec7de9 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -616,7 +616,7 @@ def main(): if dictionary.get("exists") == "yes": exists_counter += 1 file.write(dictionary["url_user"] + "\n") - file.write("Total Websites : {}".format(exists_counter)) + file.write(f"Total Websites Username Detected On : {exists_counter}") file.close() if args.csv == True: From 2268f8186c00ae60eff071a668cf1f195d7ee96b Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 29 Nov 2019 08:40:02 -0600 Subject: [PATCH 09/36] Remove unneeded assignment of results before main Sherlock call. --- sherlock/sherlock.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 3ec7de9..c451a8a 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -606,9 +606,13 @@ def main(): except (NameError, IndexError): proxy = args.proxy - results = {} - results = sherlock(username, site_data, verbose=args.verbose, - tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy, print_found_only=args.print_found_only) + results = sherlock(username, + site_data, + verbose=args.verbose, + tor=args.tor, + unique_tor=args.unique_tor, + proxy=args.proxy, + print_found_only=args.print_found_only) exists_counter = 0 for website_name in results: From 571f27f9a5ed68a3fcd928729747103dda02dab3 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 29 Nov 2019 14:15:32 -0600 Subject: [PATCH 10/36] Clean up code that only reads the headers if we are doing HTTP Status detection. Remove special check for GitHub: everything works fine without it. --- sherlock/sherlock.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index c451a8a..7146721 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -263,11 +263,11 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, pr # from where the user profile normally can be found. url_probe = url_probe.format(username) - request_method = session.get - if social_network != "GitHub": - # If only the status_code is needed don't download the body - if net_info["errorType"] == 'status_code': - request_method = session.head + #If only the status_code is needed don't download the body + if net_info["errorType"] == 'status_code': + request_method = session.head + else: + request_method = session.get if net_info["errorType"] == "response_url": # Site forwards request to a different URL if username not From 6114ca263d520605fe90988155e2508913fbf8d6 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 26 Dec 2019 10:59:52 -0600 Subject: [PATCH 11/36] Remove Proxy List Support While doing the restructuring, I am testing in more depth as I change the code. And, I am trying to grok how the proxy options work. Specifically, how the proxy list works. Or, does not work. There is code in the main function that randomly selects proxies from a list, but it does not actually use the result. This was noticed in #292. It looks like the only place where the proxy list is used is when there is a proxy error during get_response()...in that case a new random proxy is chosen. But, there is no care taken to ensure that we do not get the same proxy that just errored out. It seems like problematic proxies should be blacklisted if there is that type of failure. Moreover, there is a check earlier in the code that does not allow the proxy list and proxy command line option to be used simultaneously. So, I can see no way that the proxy list has any functionality: if you do define the proxy list, then there is no way to kick off the general request with a proxy. I also noticed that the recursive get_response() call does not pass its return tuples back up the call chain. The existing code would never get any good from the switchover to an alternate proxy (even if the other problems mentioned above were resolved). For now, I am removing the support. This feature may be looked at after the restructuring is done. --- sherlock/load_proxies.py | 89 ---------------------------------------- sherlock/sherlock.py | 69 ++----------------------------- 2 files changed, 4 insertions(+), 154 deletions(-) delete mode 100644 sherlock/load_proxies.py diff --git a/sherlock/load_proxies.py b/sherlock/load_proxies.py deleted file mode 100644 index d8a0a91..0000000 --- a/sherlock/load_proxies.py +++ /dev/null @@ -1,89 +0,0 @@ -import csv -import requests -import time -from collections import namedtuple -from colorama import Fore, Style - - -def load_proxies_from_csv(path_to_list): - """ - A function which loads proxies from a .csv file, to a list. - - Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'. - - Outputs: list containing proxies stored in named tuples. - """ - Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol']) - - with open(path_to_list, 'r') as csv_file: - csv_reader = csv.DictReader(csv_file) - proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader] - - return proxies - - -def check_proxy(proxy_ip, proxy_port, protocol): - """ - A function which test the proxy by attempting - to make a request to the designated website. - - We use 'wikipedia.org' as a test, since we can test the proxy anonymity - by check if the returning 'X-Client-IP' header matches the proxy ip. - """ - full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}' - proxies = {'http': full_proxy, 'https': full_proxy} - try: - r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4) - return_proxy = r.headers['X-Client-IP'] - if proxy_ip==return_proxy: - return True - else: - return False - except Exception: - return False - - -def check_proxy_list(proxy_list, max_proxies=None): - """ - A function which takes in one mandatory argument -> a proxy list in - the format returned by the function 'load_proxies_from_csv'. - - It also takes an optional argument 'max_proxies', if the user wishes to - cap the number of validated proxies. - - Each proxy is tested by the check_proxy function. Since each test is done on - 'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, - but are sending successive requests each separated by at least 1 sec. - - Outputs: list containing proxies stored in named tuples. - """ - print((Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Started checking proxies.")) - working_proxies = [] - - # If the user has limited the number of proxies we need, - # the function will stop when the working_proxies - # loads the max number of requested proxies. - if max_proxies != None: - for proxy in proxy_list: - if len(working_proxies) < max_proxies: - time.sleep(1) - if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: - working_proxies.append(proxy) - else: - break - else: - for proxy in proxy_list: - time.sleep(1) - if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True: - working_proxies.append(proxy) - - if len(working_proxies) > 0: - print((Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Finished checking proxies.")) - return working_proxies - - else: - raise Exception("Found no working proxies.") diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index fe6d2cc..bf00a3f 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -23,15 +23,11 @@ from colorama import Fore, Style, init from requests_futures.sessions import FuturesSession from torrequest import TorRequest -from load_proxies import load_proxies_from_csv, check_proxy_list module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.10.0" -global proxy_list - -proxy_list = [] class SherlockFuturesSession(FuturesSession): def request(self, method, url, hooks={}, *args, **kwargs): @@ -154,9 +150,8 @@ def print_invalid(social_network, msg, color=True): print(f"[-] {social_network} {msg}") -def get_response(request_future, error_type, social_network, verbose=False, retry_no=None, color=True): +def get_response(request_future, error_type, social_network, verbose=False, color=True): - global proxy_list try: rsp = request_future.result() @@ -164,18 +159,8 @@ def get_response(request_future, error_type, social_network, verbose=False, retr return rsp, error_type, rsp.elapsed except requests.exceptions.HTTPError as errh: print_error(errh, "HTTP Error:", social_network, verbose, color) - - # In case our proxy fails, we retry with another proxy. except requests.exceptions.ProxyError as errp: - if retry_no>0 and len(proxy_list)>0: - #Selecting the new proxy. - new_proxy = random.choice(proxy_list) - new_proxy = f'{new_proxy.protocol}://{new_proxy.ip}:{new_proxy.port}' - print(f'Retrying with {new_proxy}') - request_future.proxy = {'http':new_proxy,'https':new_proxy} - get_response(request_future,error_type, social_network, verbose,retry_no=retry_no-1, color=color) - else: - print_error(errp, "Proxy error:", social_network, verbose, color) + print_error(errp, "Proxy error:", social_network, verbose, color) except requests.exceptions.ConnectionError as errc: print_error(errc, "Error Connecting:", social_network, verbose, color) except requests.exceptions.Timeout as errt: @@ -353,7 +338,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, error_type=error_type, social_network=social_network, verbose=verbose, - retry_no=3, color=color) # Attempt to get request information @@ -495,16 +479,6 @@ def main(): parser.add_argument("--json", "-j", metavar="JSON_FILE", dest="json_file", default="resources/data.json", help="Load data from a JSON file or an online, valid, JSON file.") - parser.add_argument("--proxy_list", "-pl", metavar='PROXY_LIST', - action="store", dest="proxy_list", default=None, - help="Make requests over a proxy randomly chosen from a list generated from a .csv file." - ) - parser.add_argument("--check_proxies", "-cp", metavar='CHECK_PROXY', - action="store", dest="check_prox", default=None, - help="To be used with the '--proxy_list' parameter. " - "The script will check if the proxies supplied in the .csv file are working and anonymous." - "Put 0 for no limit on successfully checked proxies, or another number to institute a limit." - ) parser.add_argument("--timeout", action="store", metavar='TIMEOUT', dest="timeout", type=timeout_check, default=None, @@ -532,40 +506,13 @@ def main(): # Argument check # TODO regex check on args.proxy - if args.tor and (args.proxy != None or args.proxy_list != None): - raise Exception("Tor and Proxy cannot be set in the meantime.") - - # Proxy argument check. - # Does not necessarily need to throw an error, - # since we could join the single proxy with the ones generated from the .csv, - # but it seems unnecessarily complex at this time. - if args.proxy != None and args.proxy_list != None: - raise Exception("A single proxy cannot be used along with proxy list.") + if args.tor and (args.proxy != None): + raise Exception("Tor and Proxy cannot be set at the same time.") # Make prompts if args.proxy != None: print("Using the proxy: " + args.proxy) - global proxy_list - - if args.proxy_list != None: - print_info("Loading proxies from", args.proxy_list, not args.color) - - proxy_list = load_proxies_from_csv(args.proxy_list) - - # Checking if proxies should be checked for anonymity. - if args.check_prox != None and args.proxy_list != None: - try: - limit = int(args.check_prox) - if limit == 0: - proxy_list = check_proxy_list(proxy_list) - elif limit > 0: - proxy_list = check_proxy_list(proxy_list, limit) - else: - raise ValueError - except ValueError: - raise Exception("Parameter --check_proxies/-cp must be a positive integer.") - if args.tor or args.unique_tor: print("Using Tor to make requests") print("Warning: some websites might refuse connecting over Tor, so note that using this option might increase connection errors.") @@ -661,14 +608,6 @@ def main(): else: file = open(username + ".txt", "w", encoding="utf-8") - # We try to ad a random member of the 'proxy_list' var as the proxy of the request. - # If we can't access the list or it is empty, we proceed with args.proxy as the proxy. - try: - random_proxy = random.choice(proxy_list) - proxy = f'{random_proxy.protocol}://{random_proxy.ip}:{random_proxy.port}' - except (NameError, IndexError): - proxy = args.proxy - results = sherlock(username, site_data, verbose=args.verbose, From 519ac343467ca5b9a148ce48b04b057f4918af51 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 26 Dec 2019 14:00:44 -0600 Subject: [PATCH 12/36] Extract all print statements from function that gets the response. Also, print out social network for error messages. --- sherlock/sherlock.py | 55 ++++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index bf00a3f..03785d1 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -102,15 +102,16 @@ def print_info(title, info, color=True): else: print(f"[*] {title} {info} on:") -def print_error(err, errstr, var, verbose=False, color=True): +def print_error(social_network, err, errstr, var, verbose=False, color=True): if color: print(Style.BRIGHT + Fore.WHITE + "[" + Fore.RED + "-" + Fore.WHITE + "]" + + Fore.GREEN + f" {social_network}:" + Fore.RED + f" {errstr}" + Fore.YELLOW + f" {err if verbose else var}") else: - print(f"[-] {errstr} {err if verbose else var}") + print(f"[-] {social_network}: {errstr} {err if verbose else var}") def format_response_time(response_time, verbose): @@ -152,22 +153,33 @@ def print_invalid(social_network, msg, color=True): def get_response(request_future, error_type, social_network, verbose=False, color=True): + #Default for Response object if some failure occurs. + response = None + error_context = "General Unknown Error" + expection_text = None try: - rsp = request_future.result() - if rsp.status_code: - return rsp, error_type, rsp.elapsed + response = request_future.result() + if response.status_code: + #status code exists in response object + error_context = None except requests.exceptions.HTTPError as errh: - print_error(errh, "HTTP Error:", social_network, verbose, color) + error_context = "HTTP Error" + expection_text = str(errh) except requests.exceptions.ProxyError as errp: - print_error(errp, "Proxy error:", social_network, verbose, color) + error_context = "Proxy Error" + expection_text = str(errp) except requests.exceptions.ConnectionError as errc: - print_error(errc, "Error Connecting:", social_network, verbose, color) + error_context = "Error Connecting" + expection_text = str(errc) except requests.exceptions.Timeout as errt: - print_error(errt, "Timeout Error:", social_network, verbose, color) + error_context = "Timeout Error" + expection_text = str(errt) except requests.exceptions.RequestException as err: - print_error(err, "Unknown error:", social_network, verbose, color) - return None, "", -1 + error_context = "Unknown Error" + expection_text = str(err) + + return response, error_context, expection_text def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, @@ -334,11 +346,17 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, # Retrieve future and ensure it has finished future = net_info["request_future"] - r, error_type, response_time = get_response(request_future=future, - error_type=error_type, - social_network=social_network, - verbose=verbose, - color=color) + r, error_text, expection_text = get_response(request_future=future, + error_type=error_type, + social_network=social_network, + verbose=verbose, + color=color) + + #Get response time for response of our request. + try: + response_time = r.elapsed + except AttributeError: + response_time = None # Attempt to get request information try: @@ -350,7 +368,10 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, except: pass - if error_type == "message": + if error_text is not None: + print_error(social_network, expection_text, error_text, "", verbose, color) + exists = "error" + elif error_type == "message": error = net_info.get("errorMsg") # Checks if the error message is in the HTML if not error in r.text: From 2a1ab1c281fbe9abb3bc0593ffc0e1414ebec1db Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 27 Dec 2019 10:17:10 -0600 Subject: [PATCH 13/36] Add result module to hold results of site queries. The QueryResult() object contains an enumeration for the possible status about a given username on a site, and additional error information that might be handy. Rework all code to use this object instead of the "exists" key in the result dictionary that was used previously. --- sherlock/result.py | 74 ++++++++++++++++++++++++++++++++++++++++++ sherlock/sherlock.py | 46 ++++++++++++-------------- sherlock/tests/base.py | 9 +++-- 3 files changed, 100 insertions(+), 29 deletions(-) create mode 100644 sherlock/result.py diff --git a/sherlock/result.py b/sherlock/result.py new file mode 100644 index 0000000..acbdabd --- /dev/null +++ b/sherlock/result.py @@ -0,0 +1,74 @@ +"""Sherlock Result Module + +This module defines various objects for recording the results of queries. +""" +from enum import Enum + + +class QueryStatus(Enum): + """Query Status Enumeration. + + Describes status of query about a given username. + """ + CLAIMED = "Claimed" #Username Detected + AVAILABLE = "Available" #Username Not Detected + UNKNOWN = "Unknown" #Error Occurred While Trying To Detect Username + ILLEGAL = "Illegal" #Username Not Allowable For This Site + + def __str__(self): + """Convert Object To String. + + Keyword Arguments: + self -- This object. + + Return Value: + Nicely formatted string to get information about this object. + """ + return self.value + +class QueryResult(): + """Query Result Object. + + Describes result of query about a given username. + """ + def __init__(self, status, context=None): + """Create Query Result Object. + + Contains information about a specific method of detecting usernames on + a given type of web sites. + + Keyword Arguments: + self -- This object. + status -- Enumeration of type QueryStatus() indicating + the status of the query. + context -- String indicating any additional context + about the query. For example, if there was + an error, this might indicate the type of + error that occurred. + Default of None. + + Return Value: + Nothing. + """ + + self.status = status + self.context = context + + return + + def __str__(self): + """Convert Object To String. + + Keyword Arguments: + self -- This object. + + Return Value: + Nicely formatted string to get information about this object. + """ + status = str(self.status) + if self.context is not None: + #There is extra context information available about the results. + #Append it to the normal response text. + status += f" ({self.context})" + + return status diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 03785d1..2378164 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -23,6 +23,8 @@ from colorama import Fore, Style, init from requests_futures.sessions import FuturesSession from torrequest import TorRequest +from result import QueryStatus +from result import QueryResult module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.10.0" @@ -206,7 +208,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, the following keys: url_main: URL of main site. url_user: URL of user on site (if account exists). - exists: String indicating results of test for account existence. + status: QueryResult() object indicating results of test for + account existence. http_status: HTTP status code of query which checked for existence on site. response_text: Text that came back from request. May be None if @@ -265,7 +268,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, if not print_found_only: print_invalid(social_network, "Illegal Username Format For This Site!", color) - results_site["exists"] = "illegal" + results_site['status'] = QueryResult(QueryStatus.ILLEGAL) results_site["url_user"] = "" results_site['http_status'] = "" results_site['response_text'] = "" @@ -332,18 +335,14 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, # Retrieve other site information again url = results_site.get("url_user") - exists = results_site.get("exists") - if exists is not None: + status = results_site.get("status") + if status is not None: # We have already determined the user doesn't exist here continue # Get the expected error type error_type = net_info["errorType"] - # Default data in case there are any failures in doing a request. - http_status = "?" - response_text = "" - # Retrieve future and ensure it has finished future = net_info["request_future"] r, error_text, expection_text = get_response(request_future=future, @@ -362,35 +361,35 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, try: http_status = r.status_code except: - pass + http_status = "?" try: response_text = r.text.encode(r.encoding) except: - pass + response_text = "" if error_text is not None: print_error(social_network, expection_text, error_text, "", verbose, color) - exists = "error" + result = QueryResult(QueryStatus.UNKNOWN, error_text) elif error_type == "message": error = net_info.get("errorMsg") # Checks if the error message is in the HTML if not error in r.text: print_found(social_network, url, response_time, verbose, color) - exists = "yes" + result = QueryResult(QueryStatus.CLAIMED) else: if not print_found_only: print_not_found(social_network, response_time, verbose, color) - exists = "no" + result = QueryResult(QueryStatus.AVAILABLE) elif error_type == "status_code": # Checks if the status code of the response is 2XX if not r.status_code >= 300 or r.status_code < 200: print_found(social_network, url, response_time, verbose, color) - exists = "yes" + result = QueryResult(QueryStatus.CLAIMED) else: if not print_found_only: print_not_found(social_network, response_time, verbose, color) - exists = "no" + result = QueryResult(QueryStatus.AVAILABLE) elif error_type == "response_url": # For this detection method, we have turned off the redirect. @@ -401,19 +400,14 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, if 200 <= r.status_code < 300: # print_found(social_network, url, response_time, verbose, color) - exists = "yes" + result = QueryResult(QueryStatus.CLAIMED) else: if not print_found_only: print_not_found(social_network, response_time, verbose, color) - exists = "no" + result = QueryResult(QueryStatus.AVAILABLE) - elif error_type == "": - if not print_found_only: - print_invalid(social_network, "Error!", color) - exists = "error" - - # Save exists flag - results_site['exists'] = exists + # Save status of request + results_site['status'] = result # Save results from request results_site['http_status'] = http_status @@ -642,7 +636,7 @@ def main(): exists_counter = 0 for website_name in results: dictionary = results[website_name] - if dictionary.get("exists") == "yes": + if dictionary.get("status").status == QueryStatus.CLAIMED: exists_counter += 1 file.write(dictionary["url_user"] + "\n") file.write(f"Total Websites Username Detected On : {exists_counter}") @@ -665,7 +659,7 @@ def main(): site, results[site]['url_main'], results[site]['url_user'], - results[site]['exists'], + str(results[site]['status'].status), results[site]['http_status'], results[site]['response_time_s'] ] diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 0c55766..62e69c6 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -7,6 +7,8 @@ import os import os.path import unittest import sherlock +from result import QueryStatus +from result import QueryResult import warnings @@ -95,10 +97,10 @@ class SherlockBaseTest(unittest.TestCase): if exist_check: check_type_text = "exists" - exist_result_desired = "yes" + exist_result_desired = QueryStatus.CLAIMED else: check_type_text = "does not exist" - exist_result_desired = "no" + exist_result_desired = QueryStatus.AVAILABLE for username in username_list: results = sherlock.sherlock(username, @@ -112,7 +114,8 @@ class SherlockBaseTest(unittest.TestCase): with self.subTest(f"Checking Username '{username}' " f"{check_type_text} on Site '{site}'" ): - self.assertEqual(result['exists'], exist_result_desired) + self.assertEqual(result['status'].status, + exist_result_desired) return From 2a8f83924d624aee20ea47ad2deb9179fe51b68e Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 27 Dec 2019 11:01:34 -0600 Subject: [PATCH 14/36] Remove some unneeded imports. Add minor comment. --- sherlock/sherlock.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 2378164..e4990ec 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -13,9 +13,7 @@ import os import platform import re import sys -import random from argparse import ArgumentParser, RawDescriptionHelpFormatter -from concurrent.futures import ThreadPoolExecutor from time import monotonic import requests @@ -53,6 +51,7 @@ class SherlockFuturesSession(FuturesSession): Return Value: Request object. """ + #Record the start time for the request. start = monotonic() def response_time(resp, *args, **kwargs): From bbb44d7ef9bcf66779f86ec0cdbcec8842a52753 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 27 Dec 2019 11:12:34 -0600 Subject: [PATCH 15/36] Add defensive check for unknown Error Type. If it does happen, an exception will be thrown, instead of using the previous site's results. --- sherlock/sherlock.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index e4990ec..05dd8f9 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -207,7 +207,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, the following keys: url_main: URL of main site. url_user: URL of user on site (if account exists). - status: QueryResult() object indicating results of test for + status: QueryResult() object indicating results of test for account existence. http_status: HTTP status code of query which checked for existence on site. @@ -404,6 +404,10 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, if not print_found_only: print_not_found(social_network, response_time, verbose, color) result = QueryResult(QueryStatus.AVAILABLE) + else: + #It should be impossible to ever get here... + raise ValueError(f"Unknown Error Type '{error_type}' for " + f"site '{social_network}'") # Save status of request results_site['status'] = result From 647aea577c48231b222cf716d2674b91ed03adfe Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Fri, 27 Dec 2019 11:34:33 -0600 Subject: [PATCH 16/36] Factor out all print statements from portion of code that determines the query results. --- sherlock/sherlock.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 05dd8f9..3741886 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -367,29 +367,20 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, response_text = "" if error_text is not None: - print_error(social_network, expection_text, error_text, "", verbose, color) result = QueryResult(QueryStatus.UNKNOWN, error_text) elif error_type == "message": error = net_info.get("errorMsg") # Checks if the error message is in the HTML if not error in r.text: - print_found(social_network, url, response_time, verbose, color) result = QueryResult(QueryStatus.CLAIMED) else: - if not print_found_only: - print_not_found(social_network, response_time, verbose, color) result = QueryResult(QueryStatus.AVAILABLE) - elif error_type == "status_code": # Checks if the status code of the response is 2XX if not r.status_code >= 300 or r.status_code < 200: - print_found(social_network, url, response_time, verbose, color) result = QueryResult(QueryStatus.CLAIMED) else: - if not print_found_only: - print_not_found(social_network, response_time, verbose, color) result = QueryResult(QueryStatus.AVAILABLE) - elif error_type == "response_url": # For this detection method, we have turned off the redirect. # So, there is no need to check the response URL: it will always @@ -397,18 +388,31 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, # code indicates that the request was successful (i.e. no 404, or # forward to some odd redirect). if 200 <= r.status_code < 300: - # - print_found(social_network, url, response_time, verbose, color) result = QueryResult(QueryStatus.CLAIMED) else: - if not print_found_only: - print_not_found(social_network, response_time, verbose, color) result = QueryResult(QueryStatus.AVAILABLE) else: #It should be impossible to ever get here... raise ValueError(f"Unknown Error Type '{error_type}' for " f"site '{social_network}'") + + if result.status == QueryStatus.CLAIMED: + print_found(social_network, url, response_time, verbose, color) + elif result.status == QueryStatus.AVAILABLE: + if not print_found_only: + print_not_found(social_network, response_time, verbose, color) + elif result.status == QueryStatus.UNKNOWN: + print_error(social_network, expection_text, error_text, "", verbose, color) + elif result.status == QueryStatus.ILLEGAL: + if not print_found_only: + print_invalid(social_network, "Illegal Username Format For This Site!", color) + else: + #It should be impossible to ever get here... + raise ValueError(f"Unknown Query Status '{str(result.status)}' for " + f"site '{social_network}'") + + # Save status of request results_site['status'] = result From 7f87f5fcc4524e0e7f67ca25967ac05c5ce767d4 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 29 Dec 2019 00:50:06 -0600 Subject: [PATCH 17/36] Add module to store information about the sites. This handles getting the information loaded from the JSON file. For now, use the new SitesInformation() object to calculate the original JSON dictionary: the rest of the code will be updated in the future. --- sherlock/sherlock.py | 46 +++------ sherlock/sites.py | 226 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 33 deletions(-) create mode 100644 sherlock/sites.py diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 3741886..b4f07d7 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -23,6 +23,7 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest from result import QueryStatus from result import QueryResult +from sites import SitesInformation module_name = "Sherlock: Find Usernames Across Social Networks" __version__ = "0.10.0" @@ -499,7 +500,7 @@ def main(): help="Make requests over a proxy. e.g. socks5://127.0.0.1:1080" ) parser.add_argument("--json", "-j", metavar="JSON_FILE", - dest="json_file", default="resources/data.json", + dest="json_file", default=None, help="Load data from a JSON file or an online, valid, JSON file.") parser.add_argument("--timeout", action="store", metavar='TIMEOUT', @@ -549,41 +550,20 @@ def main(): print("You can only use --output with a single username") sys.exit(1) - response_json_online = None - site_data_all = None - # Try to load json from website. + #Create object with all information about sites we are aware of. try: - response_json_online = requests.get(url=args.json_file) - except requests.exceptions.MissingSchema: # In case the schema is wrong it's because it may not be a website - pass + sites = SitesInformation(args.json_file) + except Exception as error: + print(f"ERROR: {error}") + sys.exit(1) - # Check if the response is appropriate. - if response_json_online is not None and response_json_online.status_code == 200: - # Since we got data from a website, try to load json and exit if parsing fails. - try: - site_data_all = response_json_online.json() - except ValueError: - print("Invalid JSON from website!") - sys.exit(1) - pass - - data_file_path = os.path.join(os.path.dirname( - os.path.realpath(__file__)), args.json_file) - # This will be none if the request had a missing schema - if site_data_all is None: - # Check if the file exists otherwise exit. - if not os.path.exists(data_file_path): - print("JSON file doesn't exist.") - print( - "If this is not a file but a website, make sure you have appended http:// or https://.") - sys.exit(1) - else: - raw = open(data_file_path, "r", encoding="utf-8") - try: - site_data_all = json.load(raw) - except: - print("Invalid JSON loaded from file.") + #Create original dictionary from SitesInformation() object. + #Eventually, the rest of the code will be updated to use the new object + #directly, but this will glue the two pieces together. + site_data_all = {} + for site in sites: + site_data_all[site.name] = site.information if args.site_list is None: # Not desired to look at a sub-set of sites diff --git a/sherlock/sites.py b/sherlock/sites.py new file mode 100644 index 0000000..8bbe072 --- /dev/null +++ b/sherlock/sites.py @@ -0,0 +1,226 @@ +"""Sherlock Sites Information Module + +This module supports storing information about web sites. +This is the raw data that will be used to search for usernames. +""" +import logging +import os +import json +import requests + + +class SiteInformation(): + def __init__(self, name, url_home, url_username_format, + username_claimed, username_unclaimed, + information): + """Create Site Information Object. + + Contains information about a specific web site. + + Keyword Arguments: + self -- This object. + name -- String which identifies site. + url_home -- String containing URL for home of site. + url_username_format -- String containing URL for Username format + on site. + NOTE: The string should contain the + token "{}" where the username should + be substituted. For example, a string + of "https://somesite.com/users/{}" + indicates that the individual + usernames would show up under the + "https://somesite.com/users/" area of + the web site. + username_claimed -- String containing username which is known + to be claimed on web site. + username_unclaimed -- String containing username which is known + to be unclaimed on web site. + information -- Dictionary containing all known information + about web site. + NOTE: Custom information about how to + actually detect the existence of the + username will be included in this + dictionary. This information will + be needed by the detection method, + but it is only recorded in this + object for future use. + + Return Value: + Nothing. + """ + + self.name = name + self.url_home = url_home + self.url_username_format = url_username_format + self.username_claimed = username_claimed + self.username_unclaimed = username_unclaimed + self.information = information + + return + + def __str__(self): + """Convert Object To String. + + Keyword Arguments: + self -- This object. + + Return Value: + Nicely formatted string to get information about this object. + """ + + return f"{self.name} ({self.url_home})" + + +class SitesInformation(): + def __init__(self, data_file_path=None): + """Create Sites Information Object. + + Contains information about all supported web sites. + + Keyword Arguments: + self -- This object. + data_file_path -- String which indicates path to data file. + The file name must end in ".json". + + There are 3 possible formats: + * Absolute File Format + For example, "c:/stuff/data.json". + * Relative File Format + The current working directory is used + as the context. + For example, "data.json". + * URL Format + For example, + "https://example.com/data.json", or + "http://example.com/data.json". + + An exception will be thrown if the path + to the data file is not in the expected + format, or if there was any problem loading + the file. + + If this option is not specified, then a + default site list will be used. + + Return Value: + Nothing. + """ + + if data_file_path is None: + #Use internal default. + data_file_path = \ + os.path.join(os.path.dirname(os.path.realpath(__file__)), + "resources/data.json" + ) + + #Ensure that specified data file has correct extension. + if ".json" != data_file_path[-5:].lower(): + raise FileNotFoundError(f"Incorrect JSON file extension for " + f"data file '{data_file_path}'." + ) + + if ( ("http://" == data_file_path[:7].lower()) or + ("https://" == data_file_path[:8].lower()) + ): + #Reference is to a URL. + try: + response = requests.get(url=data_file_path) + except Exception as error: + raise FileNotFoundError(f"Problem while attempting to access " + f"data file URL '{data_file_path}': " + f"{str(error)}" + ) + if response.status_code == 200: + try: + site_data = response.json() + except Exception as error: + raise ValueError(f"Problem parsing json contents at " + f"'{data_file_path}': {str(error)}." + ) + else: + raise FileNotFoundError(f"Bad response while accessing " + f"data file URL '{data_file_path}'." + ) + else: + #Reference is to a file. + try: + with open(data_file_path, "r", encoding="utf-8") as file: + try: + site_data = json.load(file) + except Exception as error: + raise ValueError(f"Problem parsing json contents at " + f"'{data_file_path}': {str(error)}." + ) + except FileNotFoundError as error: + raise FileNotFoundError(f"Problem while attempting to access " + f"data file '{data_file_path}'." + ) + + self.sites = {} + + #Add all of site information from the json file to internal site list. + for site_name in site_data: + try: + self.sites[site_name] = \ + SiteInformation(site_name, + site_data[site_name]["urlMain"], + site_data[site_name]["url"], + site_data[site_name]["username_claimed"], + site_data[site_name]["username_unclaimed"], + site_data[site_name] + ) + except KeyError as error: + raise ValueError(f"Problem parsing json contents at " + f"'{data_file_path}': " + f"Missing attribute {str(error)}." + ) + + #Initialize state if anyone iterates over this object. + self.__iteration_index = 0 + + return + + def __iter__(self): + """Iterator For Object. + + Keyword Arguments: + self -- This object. + + Return Value: + Iterator for sites object. + """ + return self + + def __next__(self): + """Next Method For Object. + + Keyword Arguments: + self -- This object. + + Return Value: + Returns individual site from beginning of self.sites dictionary + to the end. + Raises StopIteration when all sites have been passed. + """ + + if self.__iteration_index >= len(self.sites): + #Finished with iteration. + self.__iteration_index = 0 + raise StopIteration + else: + #Retrieve the next site from the ordered dictionary. + site = self.sites[list(self.sites)[self.__iteration_index]] + self.__iteration_index += 1 + + return site + + def __len__(self): + """Length For Object. + + Keyword Arguments: + self -- This object. + + Return Value: + Length of sites object. + """ + return len(self.sites) From 2e195d44393fc7347e25c82f7e0f2144a18189d0 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 11:19:15 -0600 Subject: [PATCH 18/36] Move all writing of output files to occur after query takes place. Use with statement for results file, as that is more graceful on errors. Use try block for result directory creation: this has a smaller window for a race condition. --- sherlock/sherlock.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index c3ac6a2..74e1b3e 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -599,17 +599,6 @@ def main(): for username in args.username: print() - if args.output: - file = open(args.output, "w", encoding="utf-8") - elif args.folderoutput: # In case we handle multiple usernames at a targetted folder. - # If the folder doesnt exist, create it first - if not os.path.isdir(args.folderoutput): - os.mkdir(args.folderoutput) - file = open(os.path.join(args.folderoutput, - username + ".txt"), "w", encoding="utf-8") - else: - file = open(username + ".txt", "w", encoding="utf-8") - results = sherlock(username, site_data, verbose=args.verbose, @@ -620,14 +609,28 @@ def main(): timeout=args.timeout, color=not args.no_color) - exists_counter = 0 - for website_name in results: - dictionary = results[website_name] - if dictionary.get("status").status == QueryStatus.CLAIMED: - exists_counter += 1 - file.write(dictionary["url_user"] + "\n") - file.write(f"Total Websites Username Detected On : {exists_counter}") - file.close() + if args.output: + result_file = args.output + elif args.folderoutput: + # The usernames results should be stored in a targeted folder. + # If the folder doesnt exist, create it first + try: + os.mkdir(args.folderoutput) + except FileExistsError: + #directory already exists + pass + result_file = os.path.join(args.folderoutput, f"{username}.txt") + else: + result_file = f"{username}.txt" + + with open(result_file, "w", encoding="utf-8") as file: + exists_counter = 0 + for website_name in results: + dictionary = results[website_name] + if dictionary.get("status").status == QueryStatus.CLAIMED: + exists_counter += 1 + file.write(dictionary["url_user"] + "\n") + file.write(f"Total Websites Username Detected On : {exists_counter}") if args.csv == True: with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: From f29cab49e4bf5379b612532ba94824abc3546800 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 14:48:21 -0600 Subject: [PATCH 19/36] Add popularity rank to Site Information object. Add method to retrieve list of names of the sites (sorted by alphabetical or popularity rank). --- sherlock/sites.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/sherlock/sites.py b/sherlock/sites.py index 8bbe072..ad20a66 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -6,11 +6,13 @@ This is the raw data that will be used to search for usernames. import logging import os import json +import operator import requests +import sys class SiteInformation(): - def __init__(self, name, url_home, url_username_format, + def __init__(self, name, url_home, url_username_format, popularity_rank, username_claimed, username_unclaimed, information): """Create Site Information Object. @@ -31,6 +33,10 @@ class SiteInformation(): usernames would show up under the "https://somesite.com/users/" area of the web site. + popularity_rank -- Integer indicating popularity of site. + In general, smaller numbers mean more + popular ("0" or None means ranking + information not available). username_claimed -- String containing username which is known to be claimed on web site. username_unclaimed -- String containing username which is known @@ -52,6 +58,12 @@ class SiteInformation(): self.name = name self.url_home = url_home self.url_username_format = url_username_format + + if (popularity_rank is None) or (popularity_rank == 0): + #We do not know the popularity, so make site go to bottom of list. + popularity_rank = sys.maxsize + self.popularity_rank = popularity_rank + self.username_claimed = username_claimed self.username_unclaimed = username_unclaimed self.information = information @@ -161,10 +173,14 @@ class SitesInformation(): #Add all of site information from the json file to internal site list. for site_name in site_data: try: + #If popularity unknown, make site be at bottom of list. + popularity_rank = site_data[site_name].get("rank", sys.maxsize) + self.sites[site_name] = \ SiteInformation(site_name, site_data[site_name]["urlMain"], site_data[site_name]["url"], + popularity_rank, site_data[site_name]["username_claimed"], site_data[site_name]["username_unclaimed"], site_data[site_name] @@ -180,6 +196,35 @@ class SitesInformation(): return + def site_name_list(self, popularity_rank=False): + """Get Site Name List. + + Keyword Arguments: + self -- This object. + popularity_rank -- Boolean indicating if list should be sorted + by popularity rank. + Default value is False. + NOTE: List is sorted in ascending + alphabetical order is popularity rank + is not requested. + + Return Value: + List of strings containing names of sites. + """ + + if popularity_rank == True: + #Sort in ascending popularity rank order. + site_rank_name = \ + sorted([(site.popularity_rank,site.name) for site in self], + key=operator.itemgetter(0) + ) + site_names = [name for _,name in site_rank_name] + else: + #Sort in ascending alphabetical order. + site_names = sorted([site.name for site in self], key=str.lower) + + return site_names + def __iter__(self): """Iterator For Object. From 8f6938ecb120d744aed44e3b3470ef88399f1eb5 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 15:26:15 -0600 Subject: [PATCH 20/36] Add option to *not* print out results. Configure tests to there is no print output. This simplifies looking at the error output when the tests fail. --- sherlock/sherlock.py | 41 ++++++++++++++++++++++++----------------- sherlock/tests/base.py | 4 +++- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 74e1b3e..08e6e15 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -185,7 +185,8 @@ def get_response(request_future, error_type, social_network, verbose=False, colo def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, - proxy=None, print_found_only=False, timeout=None, color=True): + proxy=None, print_found_only=False, timeout=None, color=True, + print_output=True): """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -198,9 +199,12 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, tor -- Boolean indicating whether to use a tor circuit for the requests. unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL + print_found_only -- Boolean indicating whether to only print found sites. timeout -- Time in seconds to wait before timing out request. Default is no timeout. color -- Boolean indicating whether to color terminal output + print_output -- Boolean indicating whether the output should be + printed. Default is True. Return Value: Dictionary containing results from report. Key of dictionary is the name @@ -215,7 +219,8 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, response_text: Text that came back from request. May be None if there was an HTTP error when checking for existence. """ - print_info("Checking username", username, color) + if print_output == True: + print_info("Checking username", username, color) # Create session based on request methodology if tor or unique_tor: @@ -265,7 +270,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: # No need to do the check at the site: this user name is not allowed. - if not print_found_only: + if (print_output == True) and not print_found_only: print_invalid(social_network, "Illegal Username Format For This Site!", color) results_site['status'] = QueryResult(QueryStatus.ILLEGAL) @@ -398,20 +403,22 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, f"site '{social_network}'") - if result.status == QueryStatus.CLAIMED: - print_found(social_network, url, response_time, verbose, color) - elif result.status == QueryStatus.AVAILABLE: - if not print_found_only: - print_not_found(social_network, response_time, verbose, color) - elif result.status == QueryStatus.UNKNOWN: - print_error(social_network, expection_text, error_text, "", verbose, color) - elif result.status == QueryStatus.ILLEGAL: - if not print_found_only: - print_invalid(social_network, "Illegal Username Format For This Site!", color) - else: - #It should be impossible to ever get here... - raise ValueError(f"Unknown Query Status '{str(result.status)}' for " - f"site '{social_network}'") + if print_output == True: + #Output to the terminal is desired. + if result.status == QueryStatus.CLAIMED: + print_found(social_network, url, response_time, verbose, color) + elif result.status == QueryStatus.AVAILABLE: + if not print_found_only: + print_not_found(social_network, response_time, verbose, color) + elif result.status == QueryStatus.UNKNOWN: + print_error(social_network, expection_text, error_text, "", verbose, color) + elif result.status == QueryStatus.ILLEGAL: + if not print_found_only: + print_invalid(social_network, "Illegal Username Format For This Site!", color) + else: + #It should be impossible to ever get here... + raise ValueError(f"Unknown Query Status '{str(result.status)}' for " + f"site '{social_network}'") # Save status of request diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 62e69c6..553d01c 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -108,7 +108,9 @@ class SherlockBaseTest(unittest.TestCase): verbose=self.verbose, tor=self.tor, unique_tor=self.unique_tor, - timeout=self.timeout + timeout=self.timeout, + color=False, + print_output=False ) for site, result in results.items(): with self.subTest(f"Checking Username '{username}' " From f48a2980f5bad29aec1ccf13c2fc8ccfd517d4f3 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 15:33:48 -0600 Subject: [PATCH 21/36] Use SitesInformation() object in tests. For now, use the new SitesInformation() object to calculate the original JSON dictionary: the rest of the code will be updated in the future. --- sherlock/tests/base.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 553d01c..a749f53 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -9,6 +9,7 @@ import unittest import sherlock from result import QueryStatus from result import QueryResult +from sites import SitesInformation import warnings @@ -29,10 +30,16 @@ class SherlockBaseTest(unittest.TestCase): #TODO: Figure out how to fix the code so this is not needed. warnings.simplefilter("ignore", ResourceWarning) - # Load the data file with all site information. - data_file_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "resources/data.json") - with open(data_file_path, "r", encoding="utf-8") as raw: - self.site_data_all = json.load(raw) + #Create object with all information about sites we are aware of. + sites = SitesInformation() + + #Create original dictionary from SitesInformation() object. + #Eventually, the rest of the code will be updated to use the new object + #directly, but this will glue the two pieces together. + site_data_all = {} + for site in sites: + site_data_all[site.name] = site.information + self.site_data_all = site_data_all # Load excluded sites list, if any excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites") @@ -116,7 +123,7 @@ class SherlockBaseTest(unittest.TestCase): with self.subTest(f"Checking Username '{username}' " f"{check_type_text} on Site '{site}'" ): - self.assertEqual(result['status'].status, + self.assertEqual(result['status'].status, exist_result_desired) return From 1101af81320f0dde45cb9907b9e7f109cfcd6ffa Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 15:51:40 -0600 Subject: [PATCH 22/36] Add @sdushantha suggestion in creating directory. --- sherlock/sherlock.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 08e6e15..13b9dcf 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -620,12 +620,8 @@ def main(): result_file = args.output elif args.folderoutput: # The usernames results should be stored in a targeted folder. - # If the folder doesnt exist, create it first - try: - os.mkdir(args.folderoutput) - except FileExistsError: - #directory already exists - pass + # If the folder doesn't exist, create it first + os.makedirs(args.folderoutput, exist_ok=True) result_file = os.path.join(args.folderoutput, f"{username}.txt") else: result_file = f"{username}.txt" From 2c9fb4f295f46ce83645dd8c1ae5c424dc4e430a Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Tue, 31 Dec 2019 21:18:49 -0600 Subject: [PATCH 23/36] Change SitesInformation() to use a generator when iterating thru the sites. This avoids the problem of the state (i.e. self.__iteration_index) getting corrupted if any of the methods of a given object needed to iterate for their own purposes while a caller was already iterating thru the same object. The code is also much simpler to follow. --- sherlock/sites.py | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/sherlock/sites.py b/sherlock/sites.py index ad20a66..d843613 100644 --- a/sherlock/sites.py +++ b/sherlock/sites.py @@ -191,9 +191,6 @@ class SitesInformation(): f"Missing attribute {str(error)}." ) - #Initialize state if anyone iterates over this object. - self.__iteration_index = 0 - return def site_name_list(self, popularity_rank=False): @@ -234,30 +231,9 @@ class SitesInformation(): Return Value: Iterator for sites object. """ - return self - def __next__(self): - """Next Method For Object. - - Keyword Arguments: - self -- This object. - - Return Value: - Returns individual site from beginning of self.sites dictionary - to the end. - Raises StopIteration when all sites have been passed. - """ - - if self.__iteration_index >= len(self.sites): - #Finished with iteration. - self.__iteration_index = 0 - raise StopIteration - else: - #Retrieve the next site from the ordered dictionary. - site = self.sites[list(self.sites)[self.__iteration_index]] - self.__iteration_index += 1 - - return site + for site_name in self.sites: + yield self.sites[site_name] def __len__(self): """Length For Object. From de0ccfebb75f903634b62d608758cfd29e66add5 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 2 Jan 2020 06:03:41 -0600 Subject: [PATCH 24/36] Add option to skip test if site returns error status (e.g. timeout connecting with site). This makes it easier to interpret the test results. --- sherlock/tests/base.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index a749f53..3b4b180 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -53,6 +53,7 @@ class SherlockBaseTest(unittest.TestCase): self.tor=False self.unique_tor=False self.timeout=None + self.skip_error_sites=True return @@ -123,6 +124,16 @@ class SherlockBaseTest(unittest.TestCase): with self.subTest(f"Checking Username '{username}' " f"{check_type_text} on Site '{site}'" ): + if ( + (self.skip_error_sites == True) and + (result['status'].status == QueryStatus.UNKNOWN) + ): + #Some error connecting to site. + self.skipTest(f"Skipping Username '{username}' " + f"{check_type_text} on Site '{site}': " + f"Site returned error status." + ) + self.assertEqual(result['status'].status, exist_result_desired) From 529b37bc60e83afd18138e5f4ac650511cbf2209 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 12 Apr 2020 11:38:30 -0500 Subject: [PATCH 25/36] Change tests so that expected state is first and actual state follows (previous order was confusing). Also use "claimed" and "available" terms when reporting test results. --- sherlock/tests/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 3b4b180..228445c 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -104,10 +104,10 @@ class SherlockBaseTest(unittest.TestCase): site_data = self.site_data_filter(site_list) if exist_check: - check_type_text = "exists" + check_type_text = "claimed" exist_result_desired = QueryStatus.CLAIMED else: - check_type_text = "does not exist" + check_type_text = "available" exist_result_desired = QueryStatus.AVAILABLE for username in username_list: @@ -134,8 +134,8 @@ class SherlockBaseTest(unittest.TestCase): f"Site returned error status." ) - self.assertEqual(result['status'].status, - exist_result_desired) + self.assertEqual(exist_result_desired, + result['status'].status) return From 5edc3c0a0bcb7b81ac303164c3c58f3e99d95e39 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 12 Apr 2020 11:55:11 -0500 Subject: [PATCH 26/36] Fix tests. Instagram no longer uses the message detection. --- sherlock/tests/all.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sherlock/tests/all.py b/sherlock/tests/all.py index c5b99c6..4a9e535 100644 --- a/sherlock/tests/all.py +++ b/sherlock/tests/all.py @@ -21,7 +21,7 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - site = 'Instagram' + site = 'Instructables' site_data = self.site_data_all[site] #Ensure that the site's detection method has not changed. @@ -48,7 +48,7 @@ class SherlockDetectTests(SherlockBaseTest): Will trigger an assert if detection mechanism did not work as expected. """ - site = 'Instagram' + site = 'Instructables' site_data = self.site_data_all[site] #Ensure that the site's detection method has not changed. From 6f3fc3d5480733a729174d3d675783a4b84edc52 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 12 Apr 2020 11:56:36 -0500 Subject: [PATCH 27/36] Remove AdobeForums. All usernames are reported available. --- data_bad_site.json | 8 ++++++++ removed_sites.md | 25 ++++++++++++++++++++++++- sherlock/resources/data.json | 8 -------- 3 files changed, 32 insertions(+), 9 deletions(-) diff --git a/data_bad_site.json b/data_bad_site.json index 87664fa..90d76b4 100644 --- a/data_bad_site.json +++ b/data_bad_site.json @@ -1,4 +1,12 @@ { + "AdobeForums": { + "errorType": "status_code", + "rank": 59, + "url": "https://forums.adobe.com/people/{}", + "urlMain": "https://forums.adobe.com/", + "username_claimed": "jack", + "username_unclaimed": "noonewouldeverusethis77777" + }, "AngelList": { "errorType": "status_code", "rank": 5767, diff --git a/removed_sites.md b/removed_sites.md index db577de..30a218a 100644 --- a/removed_sites.md +++ b/removed_sites.md @@ -6,6 +6,29 @@ They are listed here in the hope that things may change in the future so they may be re-included. +## AdobeForums + +As of 2020-04-12, all usernames are reported as available. + +When I went to the site to see what was going on, usernames that I know +existed were redirecting to the main page. + +I was able to see user profiles without logging in, but the URL was not +related to their user name. For example, user "tomke" went to +https://community.adobe.com/t5/user/viewprofilepage/user-id/10882613. +This can be detected, but it requires a different detection method. + +``` + "AdobeForums": { + "errorType": "status_code", + "rank": 59, + "url": "https://forums.adobe.com/people/{}", + "urlMain": "https://forums.adobe.com/", + "username_claimed": "jack", + "username_unclaimed": "noonewouldeverusethis77777" + }, +``` + ## Basecamp As of 2020-02-23, all usernames are reported as not existing. @@ -423,7 +446,7 @@ exists or not. ``` -## InsaneJournal +## InsaneJournal As of 2020-02-23, InsaneJournal returns false positive, when providing a username which contains a period. Since we were not able to find the critera for a valid username, the best thing to do now is to remove it. diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index a9100c8..5a11186 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -66,14 +66,6 @@ "username_claimed": "blue", "username_unclaimed": "noonewouldeverusethis7" }, - "AdobeForums": { - "errorType": "status_code", - "rank": 59, - "url": "https://forums.adobe.com/people/{}", - "urlMain": "https://forums.adobe.com/", - "username_claimed": "jack", - "username_unclaimed": "noonewouldeverusethis77777" - }, "Alik.cz": { "errorType": "status_code", "rank": 624805, From 2a8f97b60912535306b1bdde32bf6eb1b22d44a5 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 12 Apr 2020 11:59:37 -0500 Subject: [PATCH 28/36] Fix tests for metacritic. Unclaimed username was illegal. --- sherlock/resources/data.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sherlock/resources/data.json b/sherlock/resources/data.json index 5a11186..637bf44 100644 --- a/sherlock/resources/data.json +++ b/sherlock/resources/data.json @@ -2372,7 +2372,7 @@ "url": "https://www.metacritic.com/user/{}", "urlMain": "https://www.metacritic.com/", "username_claimed": "blue", - "username_unclaimed": "noneownsthisusername" + "username_unclaimed": "noonewould" }, "mixer.com": { "errorType": "status_code", From c05471292043ad9ea843ebeb06d3ce8cfa33bf70 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sat, 18 Apr 2020 22:04:09 -0500 Subject: [PATCH 29/36] Move response time for query into QueryResult() object. --- sherlock/result.py | 9 ++++++--- sherlock/sherlock.py | 29 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/sherlock/result.py b/sherlock/result.py index acbdabd..97acfa9 100644 --- a/sherlock/result.py +++ b/sherlock/result.py @@ -31,7 +31,7 @@ class QueryResult(): Describes result of query about a given username. """ - def __init__(self, status, context=None): + def __init__(self, status, query_time=None, context=None): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -41,6 +41,8 @@ class QueryResult(): self -- This object. status -- Enumeration of type QueryStatus() indicating the status of the query. + query_time -- Time (in seconds) required to perform query. + Default of None. context -- String indicating any additional context about the query. For example, if there was an error, this might indicate the type of @@ -51,8 +53,9 @@ class QueryResult(): Nothing. """ - self.status = status - self.context = context + self.status = status + self.query_time = query_time + self.context = context return diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index a4a3f4b..bad6223 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -280,7 +280,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, results_site["url_user"] = "" results_site['http_status'] = "" results_site['response_text'] = "" - results_site['response_time_s'] = "" else: # URL of user on site (if it exists) url = net_info["url"].format(username) @@ -376,20 +375,26 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, response_text = "" if error_text is not None: - result = QueryResult(QueryStatus.UNKNOWN, error_text) + result = QueryResult(QueryStatus.UNKNOWN, + query_time=response_time, + context=error_text) elif error_type == "message": error = net_info.get("errorMsg") # Checks if the error message is in the HTML if not error in r.text: - result = QueryResult(QueryStatus.CLAIMED) + result = QueryResult(QueryStatus.CLAIMED, + query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE) + result = QueryResult(QueryStatus.AVAILABLE, + query_time=response_time) elif error_type == "status_code": # Checks if the status code of the response is 2XX if not r.status_code >= 300 or r.status_code < 200: - result = QueryResult(QueryStatus.CLAIMED) + result = QueryResult(QueryStatus.CLAIMED, + query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE) + result = QueryResult(QueryStatus.AVAILABLE, + query_time=response_time) elif error_type == "response_url": # For this detection method, we have turned off the redirect. # So, there is no need to check the response URL: it will always @@ -397,9 +402,11 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, # code indicates that the request was successful (i.e. no 404, or # forward to some odd redirect). if 200 <= r.status_code < 300: - result = QueryResult(QueryStatus.CLAIMED) + result = QueryResult(QueryStatus.CLAIMED, + query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE) + result = QueryResult(QueryStatus.AVAILABLE, + query_time=response_time) else: #It should be impossible to ever get here... raise ValueError(f"Unknown Error Type '{error_type}' for " @@ -430,7 +437,6 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, # Save results from request results_site['http_status'] = http_status results_site['response_text'] = response_text - results_site['response_time_s'] = response_time # Add this site's results into final dictionary with all of the other results. results_total[social_network] = results_site @@ -654,13 +660,16 @@ def main(): ] ) for site in results: + response_time_s = results[site]['status'].query_time + if response_time_s is None: + response_time_s = "" writer.writerow([username, site, results[site]['url_main'], results[site]['url_user'], str(results[site]['status'].status), results[site]['http_status'], - results[site]['response_time_s'] + response_time_s ] ) From c07d3967aacc7d99e016004474af99da47b5aaf6 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 19 Apr 2020 15:38:16 -0500 Subject: [PATCH 30/36] Create QueryNotify() base class for notifying the caller about the results. --- sherlock/notify.py | 67 ++++++++++++++++++++++++++++++++++++++++++ sherlock/sherlock.py | 16 +++++++++- sherlock/tests/base.py | 5 ++++ 3 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 sherlock/notify.py diff --git a/sherlock/notify.py b/sherlock/notify.py new file mode 100644 index 0000000..1046a68 --- /dev/null +++ b/sherlock/notify.py @@ -0,0 +1,67 @@ +"""Sherlock Notify Module + +This module defines the objects for notifying the caller about the +results of queries. +""" +from result import QueryStatus + + +class QueryNotify(): + """Query Notify Object. + + Base class that describes methods available to notify the results of + a query. + It is intended that other classes inherit from this base class and + override the methods to implement specific functionality. + """ + def __init__(self, result=None): + """Create Query Notify Object. + + Contains information about a specific method of notifying the results + of a query. + + Keyword Arguments: + self -- This object. + result -- Object of type QueryResult() containing + results for this query. + + Return Value: + Nothing. + """ + + self.result = result + + return + + def update(self, result): + """Notify Update. + + Notify method for query result. This method will typically be + overridden by higher level classes that will inherit from it. + + Keyword Arguments: + self -- This object. + result -- Object of type QueryResult() containing + results for this query. + + Return Value: + Nothing. + """ + + self.result = result + + return + + def __str__(self): + """Convert Object To String. + + Keyword Arguments: + self -- This object. + + Return Value: + Nicely formatted string to get information about this object. + """ + result = str(self.result) + + return result + diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index bad6223..65ff62d 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -26,6 +26,7 @@ from requests_futures.sessions import FuturesSession from torrequest import TorRequest from result import QueryStatus from result import QueryResult +from notify import QueryNotify from sites import SitesInformation module_name = "Sherlock: Find Usernames Across Social Networks" @@ -187,7 +188,8 @@ def get_response(request_future, error_type, social_network, verbose=False, colo return response, error_context, expection_text -def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, +def sherlock(username, site_data, query_notify, verbose=False, + tor=False, unique_tor=False, proxy=None, print_found_only=False, timeout=None, color=True, print_output=True): """Run Sherlock Analysis. @@ -198,6 +200,9 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, username -- String indicating username that report should be created against. site_data -- Dictionary containing all of the site data. + query_notify -- Object with base type of QueryNotify(). + This will be used to notify the caller about + query results. verbose -- Boolean indicating whether to give verbose output. tor -- Boolean indicating whether to use a tor circuit for the requests. unique_tor -- Boolean indicating whether to use a new tor circuit for each request. @@ -280,6 +285,7 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, results_site["url_user"] = "" results_site['http_status'] = "" results_site['response_text'] = "" + query_notify.update(results_site['status']) else: # URL of user on site (if it exists) url = net_info["url"].format(username) @@ -413,6 +419,9 @@ def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False, f"site '{social_network}'") + #Notify caller about results of query. + query_notify.update(result) + if print_output == True: #Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: @@ -614,12 +623,17 @@ def main(): for site in ranked_sites: site_data[site] = site_dataCpy.get(site) + + #Create notify object for query results. + query_notify = QueryNotify() + # Run report on all specified users. for username in args.username: print() results = sherlock(username, site_data, + query_notify, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor, diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 228445c..b497e7f 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -9,6 +9,7 @@ import unittest import sherlock from result import QueryStatus from result import QueryResult +from notify import QueryNotify from sites import SitesInformation import warnings @@ -49,6 +50,9 @@ class SherlockBaseTest(unittest.TestCase): except FileNotFoundError: self.excluded_sites = [] + #Create notify object for query results. + self.query_notify = QueryNotify() + self.verbose=False self.tor=False self.unique_tor=False @@ -113,6 +117,7 @@ class SherlockBaseTest(unittest.TestCase): for username in username_list: results = sherlock.sherlock(username, site_data, + self.query_notify, verbose=self.verbose, tor=self.tor, unique_tor=self.unique_tor, From ae2fd7a729211c6484ee1c463183fe12d9ca79a0 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Sun, 19 Apr 2020 15:41:59 -0500 Subject: [PATCH 31/36] Add username and site information to Query Result object. This will allow whoever defines a Query Notify object to have all of the context required to do their notifications. --- sherlock/result.py | 19 +++++++++++++++---- sherlock/sherlock.py | 44 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 13 deletions(-) diff --git a/sherlock/result.py b/sherlock/result.py index 97acfa9..3c33ba9 100644 --- a/sherlock/result.py +++ b/sherlock/result.py @@ -31,7 +31,8 @@ class QueryResult(): Describes result of query about a given username. """ - def __init__(self, status, query_time=None, context=None): + def __init__(self, username, site_name, site_url_user, status, + query_time=None, context=None): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -39,6 +40,13 @@ class QueryResult(): Keyword Arguments: self -- This object. + username -- String indicating username that query result + was about. + site_name -- String which identifies site. + site_url_user -- String containing URL for username on site. + NOTE: The site may or may not exist: this + just indicates what the name would + be, if it existed. status -- Enumeration of type QueryStatus() indicating the status of the query. query_time -- Time (in seconds) required to perform query. @@ -53,9 +61,12 @@ class QueryResult(): Nothing. """ - self.status = status - self.query_time = query_time - self.context = context + self.username = username + self.site_name = site_name + self.site_url_user = site_url_user + self.status = status + self.query_time = query_time + self.context = context return diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 65ff62d..658c20d 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -274,6 +274,9 @@ def sherlock(username, site_data, query_notify, verbose=False, # Override/append any extra headers required by a given site. headers.update(net_info["headers"]) + # URL of user on site (if it exists) + url = net_info["url"].format(username) + # Don't make request if username is invalid for the site regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: @@ -281,14 +284,16 @@ def sherlock(username, site_data, query_notify, verbose=False, if (print_output == True) and not print_found_only: print_invalid(social_network, "Illegal Username Format For This Site!", color) - results_site['status'] = QueryResult(QueryStatus.ILLEGAL) + results_site['status'] = QueryResult(username, + social_network, + url, + QueryStatus.ILLEGAL) results_site["url_user"] = "" results_site['http_status'] = "" results_site['response_text'] = "" query_notify.update(results_site['status']) else: # URL of user on site (if it exists) - url = net_info["url"].format(username) results_site["url_user"] = url url_probe = net_info.get("urlProbe") if url_probe is None: @@ -381,25 +386,40 @@ def sherlock(username, site_data, query_notify, verbose=False, response_text = "" if error_text is not None: - result = QueryResult(QueryStatus.UNKNOWN, + result = QueryResult(username, + social_network, + url, + QueryStatus.UNKNOWN, query_time=response_time, context=error_text) elif error_type == "message": error = net_info.get("errorMsg") # Checks if the error message is in the HTML if not error in r.text: - result = QueryResult(QueryStatus.CLAIMED, + result = QueryResult(username, + social_network, + url, + QueryStatus.CLAIMED, query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE, + result = QueryResult(username, + social_network, + url, + QueryStatus.AVAILABLE, query_time=response_time) elif error_type == "status_code": # Checks if the status code of the response is 2XX if not r.status_code >= 300 or r.status_code < 200: - result = QueryResult(QueryStatus.CLAIMED, + result = QueryResult(username, + social_network, + url, + QueryStatus.CLAIMED, query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE, + result = QueryResult(username, + social_network, + url, + QueryStatus.AVAILABLE, query_time=response_time) elif error_type == "response_url": # For this detection method, we have turned off the redirect. @@ -408,10 +428,16 @@ def sherlock(username, site_data, query_notify, verbose=False, # code indicates that the request was successful (i.e. no 404, or # forward to some odd redirect). if 200 <= r.status_code < 300: - result = QueryResult(QueryStatus.CLAIMED, + result = QueryResult(username, + social_network, + url, + QueryStatus.CLAIMED, query_time=response_time) else: - result = QueryResult(QueryStatus.AVAILABLE, + result = QueryResult(username, + social_network, + url, + QueryStatus.AVAILABLE, query_time=response_time) else: #It should be impossible to ever get here... From 6caa5a4e35c0f0a346d6a4aef47c12571d67511a Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 23 Apr 2020 06:58:28 -0500 Subject: [PATCH 32/36] Restructure all print output to use QueryNotifyPrint() object. Added start and finish methods to base QueryNotify() object in order to get the same type of output. The only thing not supported in this refactor is the exception text for an error status when we are in verbose mode. This is an area of future work: I think exception information like this would more properly be handled by the logging module. --- sherlock/notify.py | 195 +++++++++++++++++++++++++++++++++++++++++ sherlock/sherlock.py | 119 ++++--------------------- sherlock/tests/base.py | 5 +- 3 files changed, 213 insertions(+), 106 deletions(-) diff --git a/sherlock/notify.py b/sherlock/notify.py index 1046a68..67dde84 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -4,6 +4,7 @@ This module defines the objects for notifying the caller about the results of queries. """ from result import QueryStatus +from colorama import Fore, Style, init class QueryNotify(): @@ -33,6 +34,25 @@ class QueryNotify(): return + def start(self, message=None): + """Notify Start. + + Notify method for start of query. This method will be called before + any queries are performed. This method will typically be + overridden by higher level classes that will inherit from it. + + Keyword Arguments: + self -- This object. + message -- Object that is used to give context to start + of query. + Default is None. + + Return Value: + Nothing. + """ + + return + def update(self, result): """Notify Update. @@ -52,6 +72,25 @@ class QueryNotify(): return + def finish(self, message=None): + """Notify Finish. + + Notify method for finish of query. This method will be called after + all queries have been performed. This method will typically be + overridden by higher level classes that will inherit from it. + + Keyword Arguments: + self -- This object. + message -- Object that is used to give context to start + of query. + Default is None. + + Return Value: + Nothing. + """ + + return + def __str__(self): """Convert Object To String. @@ -65,3 +104,159 @@ class QueryNotify(): return result + +def print_error(social_network, err, errstr, var, verbose=False, color=True): + if color: + print(Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {social_network}:" + + Fore.RED + f" {errstr}" + + Fore.YELLOW + f" {err if verbose else var}") + else: + print(f"[-] {social_network}: {errstr} {err if verbose else var}") + + +def format_response_time(response_time, verbose): + return f" [{round(response_time * 1000)} ms]" if verbose else "" + + +def print_found(social_network, url, response_time, verbose=False, color=True): + if color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.GREEN + "+" + + Fore.WHITE + "]" + + format_response_time(response_time, verbose) + + Fore.GREEN + f" {social_network}:"), url) + else: + print(f"[+]{format_response_time(response_time, verbose)} {social_network}: {url}") + +def print_not_found(social_network, response_time, verbose=False, color=True): + if color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + format_response_time(response_time, verbose) + + Fore.GREEN + f" {social_network}:" + + Fore.YELLOW + " Not Found!")) + else: + print(f"[-]{format_response_time(response_time, verbose)} {social_network}: Not Found!") + +def print_invalid(social_network, msg, color=True): + """Print invalid search result.""" + if color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {social_network}:" + + Fore.YELLOW + f" {msg}")) + else: + print(f"[-] {social_network} {msg}") + + +class QueryNotifyPrint(QueryNotify): + """Query Notify Print Object. + + Query notify class that prints results. + """ + def __init__(self, result=None, verbose=False, print_found_only=False, + color=True): + """Create Query Notify Print Object. + + Contains information about a specific method of notifying the results + of a query. + + Keyword Arguments: + self -- This object. + result -- Object of type QueryResult() containing + results for this query. + verbose -- Boolean indicating whether to give verbose output. + print_found_only -- Boolean indicating whether to only print found sites. + color -- Boolean indicating whether to color terminal output + + Return Value: + Nothing. + """ + + # Colorama module's initialization. + init(autoreset=True) + + super().__init__(result) + self.verbose = verbose + self.print_found_only = print_found_only + self.color = color + + return + + def start(self, message): + """Notify Start. + + Will print the title to the standard output. + + Keyword Arguments: + self -- This object. + message -- String containing username that the series + of queries are about. + + Return Value: + Nothing. + """ + + title = "Checking username" + if self.color: + print(Style.BRIGHT + Fore.GREEN + "[" + + Fore.YELLOW + "*" + + Fore.GREEN + f"] {title}" + + Fore.WHITE + f" {message}" + + Fore.GREEN + " on:") + else: + print(f"[*] {title} {message} on:") + + return + + def update(self, result): + """Notify Update. + + Will print the query result to the standard output. + + Keyword Arguments: + self -- This object. + result -- Object of type QueryResult() containing + results for this query. + + Return Value: + Nothing. + """ + + self.result = result + + #Output to the terminal is desired. + if result.status == QueryStatus.CLAIMED: + print_found(self.result.site_name, self.result.site_url_user, self.result.query_time, self.verbose, self.color) + elif result.status == QueryStatus.AVAILABLE: + if not self.print_found_only: + print_not_found(self.result.site_name, self.result.query_time, self.verbose, self.color) + elif result.status == QueryStatus.UNKNOWN: + print_error(self.result.site_name, "Exception Text", self.result.context, "", self.verbose, self.color) + elif result.status == QueryStatus.ILLEGAL: + if self.print_found_only == False: + print_invalid(self.result.site_name, "Illegal Username Format For This Site!", self.color) + else: + #It should be impossible to ever get here... + raise ValueError(f"Unknown Query Status '{str(result.status)}' for " + f"site '{self.result.site_name}'") + + return + + def __str__(self): + """Convert Object To String. + + Keyword Arguments: + self -- This object. + + Return Value: + Nicely formatted string to get information about this object. + """ + result = str(self.result) + + return result diff --git a/sherlock/sherlock.py b/sherlock/sherlock.py index 658c20d..c4f9e60 100644 --- a/sherlock/sherlock.py +++ b/sherlock/sherlock.py @@ -20,13 +20,13 @@ from time import time import webbrowser import requests -from colorama import Fore, Style, init from requests_futures.sessions import FuturesSession from torrequest import TorRequest from result import QueryStatus from result import QueryResult from notify import QueryNotify +from notify import QueryNotifyPrint from sites import SitesInformation module_name = "Sherlock: Find Usernames Across Social Networks" @@ -98,66 +98,7 @@ class SherlockFuturesSession(FuturesSession): *args, **kwargs) -def print_info(title, info, color=True): - if color: - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + f"] {title}" + - Fore.WHITE + f" {info}" + - Fore.GREEN + " on:") - else: - print(f"[*] {title} {info} on:") - -def print_error(social_network, err, errstr, var, verbose=False, color=True): - if color: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {social_network}:" + - Fore.RED + f" {errstr}" + - Fore.YELLOW + f" {err if verbose else var}") - else: - print(f"[-] {social_network}: {errstr} {err if verbose else var}") - - -def format_response_time(response_time, verbose): - return f" [{round(response_time * 1000)} ms]" if verbose else "" - - -def print_found(social_network, url, response_time, verbose=False, color=True): - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - format_response_time(response_time, verbose) + - Fore.GREEN + f" {social_network}:"), url) - else: - print(f"[+]{format_response_time(response_time, verbose)} {social_network}: {url}") - -def print_not_found(social_network, response_time, verbose=False, color=True): - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - format_response_time(response_time, verbose) + - Fore.GREEN + f" {social_network}:" + - Fore.YELLOW + " Not Found!")) - else: - print(f"[-]{format_response_time(response_time, verbose)} {social_network}: Not Found!") - -def print_invalid(social_network, msg, color=True): - """Print invalid search result.""" - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {social_network}:" + - Fore.YELLOW + f" {msg}")) - else: - print(f"[-] {social_network} {msg}") - - -def get_response(request_future, error_type, social_network, verbose=False, color=True): +def get_response(request_future, error_type, social_network): #Default for Response object if some failure occurs. response = None @@ -188,10 +129,9 @@ def get_response(request_future, error_type, social_network, verbose=False, colo return response, error_context, expection_text -def sherlock(username, site_data, query_notify, verbose=False, +def sherlock(username, site_data, query_notify, tor=False, unique_tor=False, - proxy=None, print_found_only=False, timeout=None, color=True, - print_output=True): + proxy=None, timeout=None): """Run Sherlock Analysis. Checks for existence of username on various social media sites. @@ -203,16 +143,11 @@ def sherlock(username, site_data, query_notify, verbose=False, query_notify -- Object with base type of QueryNotify(). This will be used to notify the caller about query results. - verbose -- Boolean indicating whether to give verbose output. tor -- Boolean indicating whether to use a tor circuit for the requests. unique_tor -- Boolean indicating whether to use a new tor circuit for each request. proxy -- String indicating the proxy URL - print_found_only -- Boolean indicating whether to only print found sites. timeout -- Time in seconds to wait before timing out request. Default is no timeout. - color -- Boolean indicating whether to color terminal output - print_output -- Boolean indicating whether the output should be - printed. Default is True. Return Value: Dictionary containing results from report. Key of dictionary is the name @@ -227,8 +162,9 @@ def sherlock(username, site_data, query_notify, verbose=False, response_text: Text that came back from request. May be None if there was an HTTP error when checking for existence. """ - if print_output == True: - print_info("Checking username", username, color) + + #Notify caller that we are starting the query. + query_notify.start(username) # Create session based on request methodology if tor or unique_tor: @@ -281,9 +217,6 @@ def sherlock(username, site_data, query_notify, verbose=False, regex_check = net_info.get("regexCheck") if regex_check and re.search(regex_check, username) is None: # No need to do the check at the site: this user name is not allowed. - if (print_output == True) and not print_found_only: - print_invalid(social_network, "Illegal Username Format For This Site!", color) - results_site['status'] = QueryResult(username, social_network, url, @@ -365,9 +298,7 @@ def sherlock(username, site_data, query_notify, verbose=False, future = net_info["request_future"] r, error_text, expection_text = get_response(request_future=future, error_type=error_type, - social_network=social_network, - verbose=verbose, - color=color) + social_network=social_network) #Get response time for response of our request. try: @@ -448,24 +379,6 @@ def sherlock(username, site_data, query_notify, verbose=False, #Notify caller about results of query. query_notify.update(result) - if print_output == True: - #Output to the terminal is desired. - if result.status == QueryStatus.CLAIMED: - print_found(social_network, url, response_time, verbose, color) - elif result.status == QueryStatus.AVAILABLE: - if not print_found_only: - print_not_found(social_network, response_time, verbose, color) - elif result.status == QueryStatus.UNKNOWN: - print_error(social_network, expection_text, error_text, "", verbose, color) - elif result.status == QueryStatus.ILLEGAL: - if not print_found_only: - print_invalid(social_network, "Illegal Username Format For This Site!", color) - else: - #It should be impossible to ever get here... - raise ValueError(f"Unknown Query Status '{str(result.status)}' for " - f"site '{social_network}'") - - # Save status of request results_site['status'] = result @@ -475,6 +388,10 @@ def sherlock(username, site_data, query_notify, verbose=False, # Add this site's results into final dictionary with all of the other results. results_total[social_network] = results_site + + #Notify caller that all queries are finished. + query_notify.finish() + return results_total @@ -504,8 +421,6 @@ def timeout_check(value): def main(): - # Colorama module's initialization. - init(autoreset=True) version_string = f"%(prog)s {__version__}\n" + \ f"{requests.__description__}: {requests.__version__}\n" + \ @@ -651,7 +566,10 @@ def main(): #Create notify object for query results. - query_notify = QueryNotify() + query_notify = QueryNotifyPrint(result=None, + verbose=args.verbose, + print_found_only=args.print_found_only, + color=not args.no_color) # Run report on all specified users. for username in args.username: @@ -660,13 +578,10 @@ def main(): results = sherlock(username, site_data, query_notify, - verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor, proxy=args.proxy, - print_found_only=args.print_found_only, - timeout=args.timeout, - color=not args.no_color) + timeout=args.timeout) if args.output: result_file = args.output diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index b497e7f..999be46 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -118,12 +118,9 @@ class SherlockBaseTest(unittest.TestCase): results = sherlock.sherlock(username, site_data, self.query_notify, - verbose=self.verbose, tor=self.tor, unique_tor=self.unique_tor, - timeout=self.timeout, - color=False, - print_output=False + timeout=self.timeout ) for site, result in results.items(): with self.subTest(f"Checking Username '{username}' " From 1eaf0dc46e6f2d4b6af1f420974d1cc8f896db6a Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 23 Apr 2020 07:05:12 -0500 Subject: [PATCH 33/36] Update help command line. --- README.md | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index eb26a81..d96194f 100644 --- a/README.md +++ b/README.md @@ -60,13 +60,13 @@ $ python3 -m pip install -r requirements.txt ## Usage ```bash -$ python3 -m sherlock --help -usage: sherlock.py [-h] [--version] [--verbose] [--rank] - [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--tor] - [--unique-tor] [--csv] [--site SITE_NAME] - [--proxy PROXY_URL] [--json JSON_FILE] [--timeout TIMEOUT] - [--print-found] [--no-color] [--browse] - USERNAMES [USERNAMES ...] +$ python3 sherlock --help +usage: sherlock [-h] [--version] [--verbose] [--rank] + [--folderoutput FOLDEROUTPUT] [--output OUTPUT] [--tor] + [--unique-tor] [--csv] [--site SITE_NAME] [--proxy PROXY_URL] + [--json JSON_FILE] [--timeout TIMEOUT] [--print-found] + [--no-color] [--browse] + USERNAMES [USERNAMES ...] Sherlock: Find Usernames Across Social Networks (Version 0.11.0) @@ -112,12 +112,12 @@ optional arguments: To search for only one user: ``` -python3 sherlock.py user123 +python3 sherlock user123 ``` To search for more than one user: ``` -python3 sherlock.py user1 user2 user3 +python3 sherlock user1 user2 user3 ``` Accounts found will be stored in an individual text file with the corresponding username (e.g ```user123.txt```). @@ -186,6 +186,7 @@ Sherlock. This invocation hides the progress text that Sherlock normally outputs, and instead shows the verbose output of the tests. ``` +$ cd sherlock $ python3 -m unittest tests.all --buffer --verbose ``` @@ -193,7 +194,7 @@ Note that we do currently have 100% test coverage. Unfortunately, some of the sites that Sherlock checks are not always reliable, so it is common to get response errors. -If some sites are failing due to conection problems (site is down, in maintainence, etc) +If some sites are failing due to connection problems (site is down, in maintenance, etc) you can exclude them from tests by creating a `tests/.excluded_sites` file with a list of sites to ignore (one site name per line). From 1d251085b70bc52c73d2b7666670d2908ea7e1bf Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 23 Apr 2020 07:09:31 -0500 Subject: [PATCH 34/36] Remove verbose option from tests. It is not used anymore. --- sherlock/tests/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sherlock/tests/base.py b/sherlock/tests/base.py index 999be46..b0a9be8 100644 --- a/sherlock/tests/base.py +++ b/sherlock/tests/base.py @@ -53,7 +53,6 @@ class SherlockBaseTest(unittest.TestCase): #Create notify object for query results. self.query_notify = QueryNotify() - self.verbose=False self.tor=False self.unique_tor=False self.timeout=None From f26ea67e83c39e9d730992d9eafc252d7f464863 Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 23 Apr 2020 21:18:08 -0500 Subject: [PATCH 35/36] Move loose print functions into QueryNotifyPrint() class. --- sherlock/notify.py | 90 +++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 53 deletions(-) diff --git a/sherlock/notify.py b/sherlock/notify.py index 67dde84..404c276 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -105,55 +105,6 @@ class QueryNotify(): return result -def print_error(social_network, err, errstr, var, verbose=False, color=True): - if color: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {social_network}:" + - Fore.RED + f" {errstr}" + - Fore.YELLOW + f" {err if verbose else var}") - else: - print(f"[-] {social_network}: {errstr} {err if verbose else var}") - - -def format_response_time(response_time, verbose): - return f" [{round(response_time * 1000)} ms]" if verbose else "" - - -def print_found(social_network, url, response_time, verbose=False, color=True): - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - format_response_time(response_time, verbose) + - Fore.GREEN + f" {social_network}:"), url) - else: - print(f"[+]{format_response_time(response_time, verbose)} {social_network}: {url}") - -def print_not_found(social_network, response_time, verbose=False, color=True): - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - format_response_time(response_time, verbose) + - Fore.GREEN + f" {social_network}:" + - Fore.YELLOW + " Not Found!")) - else: - print(f"[-]{format_response_time(response_time, verbose)} {social_network}: Not Found!") - -def print_invalid(social_network, msg, color=True): - """Print invalid search result.""" - if color: - print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {social_network}:" + - Fore.YELLOW + f" {msg}")) - else: - print(f"[-] {social_network} {msg}") - - class QueryNotifyPrint(QueryNotify): """Query Notify Print Object. @@ -227,20 +178,53 @@ class QueryNotifyPrint(QueryNotify): Return Value: Nothing. """ + def format_response_time(response_time, verbose): + return f" [{round(response_time * 1000)} ms]" if verbose else "" self.result = result #Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: - print_found(self.result.site_name, self.result.site_url_user, self.result.query_time, self.verbose, self.color) + if self.color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.GREEN + "+" + + Fore.WHITE + "]" + + format_response_time(self.result.query_time, self.verbose) + + Fore.GREEN + f" {self.result.site_name}:"), self.result.site_url_user) + else: + print(f"[+]{format_response_time(self.result.query_time, self.verbose)} {self.result.site_name}: {self.result.site_url_user}") elif result.status == QueryStatus.AVAILABLE: if not self.print_found_only: - print_not_found(self.result.site_name, self.result.query_time, self.verbose, self.color) + if self.color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + format_response_time(self.result.query_time, self.verbose) + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.YELLOW + " Not Found!")) + else: + print(f"[-]{format_response_time(self.result.query_time, self.verbose)} {self.result.site_name}: Not Found!") elif result.status == QueryStatus.UNKNOWN: - print_error(self.result.site_name, "Exception Text", self.result.context, "", self.verbose, self.color) + if self.color: + print(Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.RED + f" {self.result.context}" + + Fore.YELLOW + f" ") + else: + print(f"[-] {self.result.site_name}: {self.result.context} ") elif result.status == QueryStatus.ILLEGAL: if self.print_found_only == False: - print_invalid(self.result.site_name, "Illegal Username Format For This Site!", self.color) + msg = "Illegal Username Format For This Site!" + if self.color: + print((Style.BRIGHT + Fore.WHITE + "[" + + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.YELLOW + f" {msg}")) + else: + print(f"[-] {self.result.site_name} {msg}") else: #It should be impossible to ever get here... raise ValueError(f"Unknown Query Status '{str(result.status)}' for " From faac3ff0f3c98981ca792086c39c91b6acce8d3d Mon Sep 17 00:00:00 2001 From: "Christopher K. Hoadley" Date: Thu, 23 Apr 2020 21:33:32 -0500 Subject: [PATCH 36/36] Get rid of function that formats time. --- sherlock/notify.py | 49 ++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/sherlock/notify.py b/sherlock/notify.py index 404c276..0e2b160 100644 --- a/sherlock/notify.py +++ b/sherlock/notify.py @@ -178,40 +178,43 @@ class QueryNotifyPrint(QueryNotify): Return Value: Nothing. """ - def format_response_time(response_time, verbose): - return f" [{round(response_time * 1000)} ms]" if verbose else "" - self.result = result + if self.verbose == False or self.result.query_time is None: + response_time_text = "" + else: + response_time_text = f" [{round(self.result.query_time * 1000)} ms]" + #Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: if self.color: print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - format_response_time(self.result.query_time, self.verbose) + - Fore.GREEN + f" {self.result.site_name}:"), self.result.site_url_user) + Fore.GREEN + "+" + + Fore.WHITE + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}: {self.result.site_url_user}")) else: - print(f"[+]{format_response_time(self.result.query_time, self.verbose)} {self.result.site_name}: {self.result.site_url_user}") + print(f"[+]{response_time_text} {self.result.site_name}: {self.result.site_url_user}") elif result.status == QueryStatus.AVAILABLE: if not self.print_found_only: if self.color: print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - format_response_time(self.result.query_time, self.verbose) + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + " Not Found!")) + Fore.RED + "-" + + Fore.WHITE + "]" + + response_time_text + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.YELLOW + " Not Found!")) else: - print(f"[-]{format_response_time(self.result.query_time, self.verbose)} {self.result.site_name}: Not Found!") + print(f"[-]{response_time_text} {self.result.site_name}: Not Found!") elif result.status == QueryStatus.UNKNOWN: if self.color: print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + f" {self.result.context}" + - Fore.YELLOW + f" ") + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.RED + f" {self.result.context}" + + Fore.YELLOW + f" ") else: print(f"[-] {self.result.site_name}: {self.result.context} ") elif result.status == QueryStatus.ILLEGAL: @@ -219,10 +222,10 @@ class QueryNotifyPrint(QueryNotify): msg = "Illegal Username Format For This Site!" if self.color: print((Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + f" {msg}")) + Fore.RED + "-" + + Fore.WHITE + "]" + + Fore.GREEN + f" {self.result.site_name}:" + + Fore.YELLOW + f" {msg}")) else: print(f"[-] {self.result.site_name} {msg}") else: