sherlock/sherlock.py

412 lines
16 KiB
Python
Raw Normal View History

#! /usr/bin/env python3
"""
Sherlock: Find Usernames Across Social Networks Module
This module contains the main logic to search for usernames at social
networks.
"""
import csv
2018-12-24 14:31:34 +00:00
import json
import os
import sys
import platform
import re
from argparse import ArgumentParser, RawDescriptionHelpFormatter
from concurrent.futures import ThreadPoolExecutor
import requests
2019-01-03 21:35:12 +00:00
from colorama import Back, Fore, Style, init
from requests_futures.sessions import FuturesSession
2018-12-29 00:50:25 +00:00
from torrequest import TorRequest
2018-12-24 14:31:34 +00:00
module_name = "Sherlock: Find Usernames Across Social Networks"
2019-01-06 10:32:35 +00:00
__version__ = "0.2.2"
amount=0
# TODO: fix tumblr
2018-12-25 18:14:39 +00:00
2019-01-05 12:43:03 +00:00
def open_file(fname):
return open(fname, "a")
2018-12-24 14:31:34 +00:00
2019-01-05 12:43:03 +00:00
def write_to_file(url, f):
f.write(url + "\n")
def final_score(amount, f):
f.write("Total: "+str(amount) + "\n")
2018-12-24 14:31:34 +00:00
def print_error(err, errstr, var, debug=False):
2019-01-04 05:17:23 +00:00
print(Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.RED + f" {errstr}" +
Fore.YELLOW + f" {err if debug else var}")
2018-12-26 17:25:28 +00:00
def get_response(request_future, error_type, social_network, verbose=False):
try:
rsp = request_future.result()
if rsp.status_code:
return rsp, error_type
except requests.exceptions.HTTPError as errh:
print_error(errh, "HTTP Error:", social_network, verbose)
except requests.exceptions.ConnectionError as errc:
print_error(errc, "Error Connecting:", social_network, verbose)
except requests.exceptions.Timeout as errt:
print_error(errt, "Timeout Error:", social_network, verbose)
except requests.exceptions.RequestException as err:
print_error(err, "Unknown error:", social_network, verbose)
return None, ""
2018-12-24 14:31:34 +00:00
def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False):
"""Run Sherlock Analysis.
Checks for existence of username on various social media sites.
2018-12-30 23:46:02 +00:00
Keyword Arguments:
username -- String indicating username that report
should be created against.
site_data -- Dictionary containing all of the site data.
2018-12-29 14:59:30 +00:00
verbose -- Boolean indicating whether to give verbose output.
tor -- Boolean indicating whether to use a tor circuit for the requests.
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
Return Value:
Dictionary containing results from report. Key of dictionary is the name
of the social network site, and the value is another dictionary with
the following keys:
url_main: URL of main site.
url_user: URL of user on site (if account exists).
exists: String indicating results of test for account existence.
http_status: HTTP status code of query which checked for existence on
site.
response_text: Text that came back from request. May be None if
there was an HTTP error when checking for existence.
"""
global amount
2019-01-03 23:44:24 +00:00
fname = username.lower() + ".txt"
2018-12-24 14:31:34 +00:00
if os.path.isfile(fname):
2018-12-25 18:14:39 +00:00
os.remove(fname)
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.GREEN + "[" +
Fore.YELLOW + "*" +
Fore.GREEN + "] Removing previous file:" +
Fore.WHITE + " {}").format(fname))
2018-12-24 14:31:34 +00:00
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.GREEN + "[" +
Fore.YELLOW + "*" +
Fore.GREEN + "] Checking username" +
Fore.WHITE + " {}" +
Fore.GREEN + " on:").format(username))
2018-12-24 14:31:34 +00:00
# A user agent is needed because some sites don't
# return the correct information since they think that
# we are bots
2018-12-24 14:31:34 +00:00
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
}
# Allow 1 thread for each external service, so `len(site_data)` threads total
executor = ThreadPoolExecutor(max_workers=len(site_data))
# Create session based on request methodology
underlying_session = requests.session()
underlying_request = requests.Request()
if tor or unique_tor:
underlying_request = TorRequest()
underlying_session = underlying_request.session()
# Create multi-threaded session for all requests
session = FuturesSession(executor=executor, session=underlying_session)
# Results from analysis of all sites
results_total = {}
# First create futures for all requests. This allows for the requests to run in parallel
for social_network, net_info in site_data.items():
# Results from analysis of this specific site
results_site = {}
# Record URL of main site
results_site['url_main'] = net_info.get("urlMain")
# Don't make request if username is invalid for the site
2018-12-30 22:39:08 +00:00
regex_check = net_info.get("regexCheck")
if regex_check and re.search(regex_check, username) is None:
# No need to do the check at the site: this user name is not allowed.
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:" +
Fore.YELLOW + " Illegal Username Format For This Site!").format(social_network))
results_site["exists"] = "illegal"
else:
# URL of user on site (if it exists)
url = net_info["url"].format(username)
results_site["url_user"] = url
request_method = session.get
if social_network != "GitHub":
# If only the status_code is needed don't download the body
if net_info["errorType"] == 'status_code':
request_method = session.head
# This future starts running the request in a new thread, doesn't block the main thread
future = request_method(url=url, headers=headers)
# Store future in data for access later
net_info["request_future"] = future
# Reset identify for tor (if needed)
if unique_tor:
underlying_request.reset_identity()
# Add this site's results into final dictionary with all of the other results.
results_total[social_network] = results_site
2019-01-05 12:43:03 +00:00
# Open the file containing account links
f = open_file(fname)
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
for social_network, net_info in site_data.items():
# Retrieve results again
results_site = results_total.get(social_network)
# Retrieve other site information again
url = results_site.get("url_user")
exists = results_site.get("exists")
if exists is not None:
# We have already determined the user doesn't exist here
continue
# Get the expected error type
error_type = net_info["errorType"]
2018-12-24 14:31:34 +00:00
# Default data in case there are any failures in doing a request.
http_status = "?"
response_text = ""
2018-12-24 14:31:34 +00:00
# Retrieve future and ensure it has finished
future = net_info["request_future"]
r, error_type = get_response(request_future=future,
error_type=error_type,
social_network=social_network,
verbose=verbose)
2018-12-25 18:14:39 +00:00
# Attempt to get request information
try:
http_status = r.status_code
except:
pass
try:
response_text = r.text.encode(r.encoding)
except:
pass
2018-12-25 18:14:39 +00:00
if error_type == "message":
2018-12-30 22:39:08 +00:00
error = net_info.get("errorMsg")
# Checks if the error message is in the HTML
if not error in r.text:
2019-01-03 10:18:04 +00:00
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.GREEN + "+" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:").format(social_network), url)
2019-01-05 12:43:03 +00:00
write_to_file(url, f)
exists = "yes"
amount=amount+1
2018-12-24 14:31:34 +00:00
else:
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:" +
Fore.YELLOW + " Not Found!").format(social_network))
exists = "no"
2018-12-25 18:14:39 +00:00
2018-12-24 14:31:34 +00:00
elif error_type == "status_code":
2019-01-04 00:04:52 +00:00
# Checks if the status code of the response is 2XX
if not r.status_code >= 300 or r.status_code < 200:
2019-01-03 10:18:04 +00:00
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.GREEN + "+" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:").format(social_network), url)
2019-01-05 12:43:03 +00:00
write_to_file(url, f)
exists = "yes"
amount=amount+1
2018-12-24 14:31:34 +00:00
else:
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:" +
Fore.YELLOW + " Not Found!").format(social_network))
exists = "no"
2018-12-24 14:31:34 +00:00
elif error_type == "response_url":
2018-12-30 22:39:08 +00:00
error = net_info.get("errorUrl")
# Checks if the redirect url is the same as the one defined in data.json
if not error in r.url:
2019-01-03 10:18:04 +00:00
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.GREEN + "+" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:").format(social_network), url)
2019-01-05 12:43:03 +00:00
write_to_file(url, f)
exists = "yes"
amount=amount+1
2018-12-24 14:31:34 +00:00
else:
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:" +
Fore.YELLOW + " Not Found!").format(social_network))
exists = "no"
elif error_type == "":
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.WHITE + "[" +
Fore.RED + "-" +
Fore.WHITE + "]" +
Fore.GREEN + " {}:" +
Fore.YELLOW + " Error!").format(social_network))
exists = "error"
# Save exists flag
results_site['exists'] = exists
# Save results from request
results_site['http_status'] = http_status
results_site['response_text'] = response_text
# Add this site's results into final dictionary with all of the other results.
results_total[social_network] = results_site
2019-01-04 05:17:23 +00:00
print((Style.BRIGHT + Fore.GREEN + "[" +
Fore.YELLOW + "*" +
Fore.GREEN + "] Saved: " +
Fore.WHITE + "{}").format(fname))
2019-01-03 10:18:04 +00:00
2019-01-05 12:43:03 +00:00
final_score(amount, f)
return results_total
def main():
2019-01-03 10:18:04 +00:00
# Colorama module's initialization.
2019-01-04 05:17:23 +00:00
init(autoreset=True)
2019-01-03 10:18:04 +00:00
version_string = f"%(prog)s {__version__}\n" + \
f"{requests.__description__}: {requests.__version__}\n" + \
f"Python: {platform.python_version()}"
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=f"{module_name} (Version {__version__})"
)
parser.add_argument("--version",
action="version", version=version_string,
help="Display version information and dependencies."
)
parser.add_argument("--verbose", "-v", "-d", "--debug",
action="store_true", dest="verbose", default=False,
help="Display extra debugging information."
)
parser.add_argument("--quiet", "-q",
action="store_false", dest="verbose",
help="Disable debugging information (Default Option)."
)
2018-12-29 00:50:25 +00:00
parser.add_argument("--tor", "-t",
action="store_true", dest="tor", default=False,
help="Make requests over TOR; increases runtime; requires TOR to be installed and in system path.")
parser.add_argument("--unique-tor", "-u",
action="store_true", dest="unique_tor", default=False,
help="Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.")
parser.add_argument("--csv",
action="store_true", dest="csv", default=False,
help="Create Comma-Separated Values (CSV) File."
)
parser.add_argument("--site",
action="append", metavar='SITE_NAME',
dest="site_list", default=None,
help="Limit analysis to just the listed sites. Add multiple options to specify more than one site."
)
parser.add_argument("username",
nargs='+', metavar='USERNAMES',
action="store",
help="One or more usernames to check with social networks."
)
2018-12-26 17:25:28 +00:00
args = parser.parse_args()
# Banner
2019-01-04 05:17:23 +00:00
print(Fore.WHITE + Style.BRIGHT +
""" .\"\"\"-.
/ \\
____ _ _ _ | _..--'-.
/ ___|| |__ ___ _ __| | ___ ___| |__ >.`__.-\"\"\;\"`
\___ \| '_ \ / _ \ '__| |/ _ \ / __| |/ / / /( ^\\
___) | | | | __/ | | | (_) | (__| < '-`) =|-.
|____/|_| |_|\___|_| |_|\___/ \___|_|\_\ /`--.'--' \ .-.
.'`-._ `.\ | J /
/ `--.| \__/""")
if args.tor or args.unique_tor:
2018-12-29 00:50:25 +00:00
print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
2018-12-25 18:14:39 +00:00
# Load the data
data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
with open(data_file_path, "r", encoding="utf-8") as raw:
site_data_all = json.load(raw)
if args.site_list is None:
# Not desired to look at a sub-set of sites
site_data = site_data_all
else:
# User desires to selectively run queries on a sub-set of the site list.
# Make sure that the sites are supported & build up pruned site database.
site_data = {}
site_missing = []
site = str()
for site in args.site_list:
for existing_site in site_data_all:
if site.lower() == existing_site.lower():
site_data[existing_site] = site_data_all[existing_site]
if not site_data:
# Build up list of sites not supported for future error message.
site_missing.append(f"'{site}'")
if site_missing != []:
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
sys.exit(1)
2018-12-29 01:45:19 +00:00
# Run report on all specified users.
for username in args.username:
print()
results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
2018-12-25 18:14:39 +00:00
if args.csv == True:
with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report:
writer = csv.writer(csv_report)
writer.writerow(['username',
'name',
'url_main',
'url_user',
'exists',
'http_status'
]
)
for site in results:
writer.writerow([username,
site,
results[site]['url_main'],
results[site]['url_user'],
results[site]['exists'],
results[site]['http_status']
]
)
2018-12-24 14:31:34 +00:00
if __name__ == "__main__":
main()