2018-12-30 20:25:04 +00:00
|
|
|
"""
|
|
|
|
Sherlock: Find Usernames Across Social Networks Module
|
2018-12-27 03:54:25 +00:00
|
|
|
|
|
|
|
This module contains the main logic to search for usernames at social
|
|
|
|
networks.
|
|
|
|
"""
|
2018-12-30 05:10:31 +00:00
|
|
|
|
2018-12-31 06:27:54 +00:00
|
|
|
import csv
|
2018-12-24 14:31:34 +00:00
|
|
|
import json
|
|
|
|
import os
|
2018-12-31 06:27:54 +00:00
|
|
|
import platform
|
2018-12-27 01:41:11 +00:00
|
|
|
import re
|
2018-12-28 19:15:41 +00:00
|
|
|
from argparse import ArgumentParser, RawDescriptionHelpFormatter
|
2018-12-31 06:27:54 +00:00
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
from requests_futures.sessions import FuturesSession
|
2018-12-29 00:50:25 +00:00
|
|
|
from torrequest import TorRequest
|
2018-12-24 14:31:34 +00:00
|
|
|
|
2018-12-27 03:54:25 +00:00
|
|
|
module_name = "Sherlock: Find Usernames Across Social Networks"
|
2018-12-30 20:39:45 +00:00
|
|
|
__version__ = "2018.12.30"
|
2018-12-25 19:19:21 +00:00
|
|
|
|
2018-12-26 08:22:37 +00:00
|
|
|
# TODO: fix tumblr
|
2018-12-25 19:19:21 +00:00
|
|
|
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-24 14:31:34 +00:00
|
|
|
def write_to_file(url, fname):
|
2018-12-25 18:14:39 +00:00
|
|
|
with open(fname, "a") as f:
|
|
|
|
f.write(url + "\n")
|
2018-12-24 14:31:34 +00:00
|
|
|
|
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
def print_error(err, errstr, var, debug=False):
|
2018-12-26 17:25:28 +00:00
|
|
|
if debug:
|
2018-12-28 19:15:41 +00:00
|
|
|
print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[91;1m {errstr}\033[93;1m {err}")
|
2018-12-26 17:25:28 +00:00
|
|
|
else:
|
2018-12-28 19:15:41 +00:00
|
|
|
print(f"\033[37;1m[\033[91;1m-\033[37;1m]\033[91;1m {errstr}\033[93;1m {var}")
|
|
|
|
|
2018-12-26 17:25:28 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
def get_response(request_future, error_type, social_network, verbose=False):
|
2018-12-25 16:01:01 +00:00
|
|
|
try:
|
2018-12-30 05:10:31 +00:00
|
|
|
rsp = request_future.result()
|
2018-12-29 01:31:31 +00:00
|
|
|
if rsp.status_code:
|
|
|
|
return rsp, error_type
|
2018-12-25 16:01:01 +00:00
|
|
|
except requests.exceptions.HTTPError as errh:
|
2018-12-27 03:54:25 +00:00
|
|
|
print_error(errh, "HTTP Error:", social_network, verbose)
|
2018-12-25 16:01:01 +00:00
|
|
|
except requests.exceptions.ConnectionError as errc:
|
2018-12-27 03:54:25 +00:00
|
|
|
print_error(errc, "Error Connecting:", social_network, verbose)
|
2018-12-25 16:01:01 +00:00
|
|
|
except requests.exceptions.Timeout as errt:
|
2018-12-27 03:54:25 +00:00
|
|
|
print_error(errt, "Timeout Error:", social_network, verbose)
|
2018-12-25 16:01:01 +00:00
|
|
|
except requests.exceptions.RequestException as err:
|
2018-12-27 03:54:25 +00:00
|
|
|
print_error(err, "Unknown error:", social_network, verbose)
|
2018-12-25 16:01:01 +00:00
|
|
|
return None, ""
|
2018-12-24 14:31:34 +00:00
|
|
|
|
|
|
|
|
2018-12-29 01:31:31 +00:00
|
|
|
def sherlock(username, verbose=False, tor=False, unique_tor=False):
|
2018-12-29 04:28:47 +00:00
|
|
|
"""Run Sherlock Analysis.
|
|
|
|
|
|
|
|
Checks for existence of username on various social media sites.
|
2018-12-30 23:46:02 +00:00
|
|
|
|
2018-12-29 04:28:47 +00:00
|
|
|
Keyword Arguments:
|
|
|
|
username -- String indicating username that report
|
|
|
|
should be created against.
|
2018-12-29 14:59:30 +00:00
|
|
|
verbose -- Boolean indicating whether to give verbose output.
|
|
|
|
tor -- Boolean indicating whether to use a tor circuit for the requests.
|
|
|
|
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
|
2018-12-29 04:28:47 +00:00
|
|
|
|
|
|
|
Return Value:
|
|
|
|
Dictionary containing results from report. Key of dictionary is the name
|
|
|
|
of the social network site, and the value is another dictionary with
|
|
|
|
the following keys:
|
|
|
|
url_main: URL of main site.
|
|
|
|
url_user: URL of user on site (if account exists).
|
|
|
|
exists: String indicating results of test for account existence.
|
|
|
|
http_status: HTTP status code of query which checked for existence on
|
|
|
|
site.
|
|
|
|
response_text: Text that came back from request. May be None if
|
|
|
|
there was an HTTP error when checking for existence.
|
|
|
|
"""
|
2018-12-25 18:14:39 +00:00
|
|
|
fname = username + ".txt"
|
2018-12-24 14:31:34 +00:00
|
|
|
|
|
|
|
if os.path.isfile(fname):
|
2018-12-25 18:14:39 +00:00
|
|
|
os.remove(fname)
|
2018-12-30 05:10:31 +00:00
|
|
|
print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Removing previous file:\033[1;37m {}\033[0m".format(fname))
|
2018-12-24 14:31:34 +00:00
|
|
|
|
|
|
|
print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Checking username\033[0m\033[1;37m {}\033[0m\033[1;92m on: \033[0m".format(username))
|
|
|
|
|
2018-12-30 20:25:04 +00:00
|
|
|
# A user agent is needed because some sites don't
|
|
|
|
# return the correct information since they think that
|
|
|
|
# we are bots
|
2018-12-24 14:31:34 +00:00
|
|
|
headers = {
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
|
|
|
|
}
|
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# Load the data
|
2018-12-30 23:46:02 +00:00
|
|
|
with open("data.json", "r", encoding="utf-8") as raw:
|
|
|
|
data = json.load(raw)
|
2018-12-30 05:10:31 +00:00
|
|
|
|
|
|
|
# Allow 1 thread for each external service, so `len(data)` threads total
|
|
|
|
executor = ThreadPoolExecutor(max_workers=len(data))
|
|
|
|
|
|
|
|
# Create session based on request methodology
|
|
|
|
underlying_session = requests.session()
|
|
|
|
underlying_request = requests.Request()
|
|
|
|
if tor or unique_tor:
|
|
|
|
underlying_request = TorRequest()
|
|
|
|
underlying_session = underlying_request.session()
|
|
|
|
|
|
|
|
# Create multi-threaded session for all requests
|
|
|
|
session = FuturesSession(executor=executor, session=underlying_session)
|
|
|
|
|
2018-12-29 04:28:47 +00:00
|
|
|
# Results from analysis of all sites
|
|
|
|
results_total = {}
|
2018-12-30 05:10:31 +00:00
|
|
|
|
|
|
|
# First create futures for all requests. This allows for the requests to run in parallel
|
2018-12-30 22:22:29 +00:00
|
|
|
for social_network, net_info in data.items():
|
2018-12-25 19:19:21 +00:00
|
|
|
|
2018-12-29 04:28:47 +00:00
|
|
|
# Results from analysis of this specific site
|
|
|
|
results_site = {}
|
|
|
|
|
|
|
|
# Record URL of main site
|
2018-12-31 00:22:30 +00:00
|
|
|
results_site['url_main'] = net_info.get("urlMain")
|
2018-12-29 04:28:47 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# Don't make request if username is invalid for the site
|
2018-12-30 22:39:08 +00:00
|
|
|
regex_check = net_info.get("regexCheck")
|
2018-12-30 05:10:31 +00:00
|
|
|
if regex_check and re.search(regex_check, username) is None:
|
|
|
|
# No need to do the check at the site: this user name is not allowed.
|
|
|
|
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Illegal Username Format For This Site!".format(social_network))
|
|
|
|
results_site["exists"] = "illegal"
|
|
|
|
else:
|
|
|
|
# URL of user on site (if it exists)
|
2018-12-30 22:22:29 +00:00
|
|
|
url = net_info["url"].format(username)
|
2018-12-30 05:10:31 +00:00
|
|
|
results_site["url_user"] = url
|
2018-12-25 19:19:21 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# This future starts running the request in a new thread, doesn't block the main thread
|
|
|
|
future = session.get(url=url, headers=headers)
|
2018-12-25 19:19:21 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# Store future in data for access later
|
2018-12-30 22:22:29 +00:00
|
|
|
net_info["request_future"] = future
|
2018-12-30 05:10:31 +00:00
|
|
|
|
|
|
|
# Reset identify for tor (if needed)
|
|
|
|
if unique_tor:
|
|
|
|
underlying_request.reset_identity()
|
|
|
|
|
|
|
|
# Add this site's results into final dictionary with all of the other results.
|
|
|
|
results_total[social_network] = results_site
|
|
|
|
|
|
|
|
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
|
2018-12-25 19:19:21 +00:00
|
|
|
for social_network in data:
|
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# Retrieve results again
|
|
|
|
results_site = results_total.get(social_network)
|
|
|
|
|
|
|
|
# Retrieve other site information again
|
|
|
|
url = results_site.get("url_user")
|
|
|
|
exists = results_site.get("exists")
|
|
|
|
if exists is not None:
|
|
|
|
# We have already determined the user doesn't exist here
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Get the expected error type
|
2018-12-30 22:22:29 +00:00
|
|
|
error_type = net_info["errorType"]
|
2018-12-24 14:31:34 +00:00
|
|
|
|
2018-12-29 04:28:47 +00:00
|
|
|
# Default data in case there are any failures in doing a request.
|
|
|
|
http_status = "?"
|
|
|
|
response_text = ""
|
2018-12-24 14:31:34 +00:00
|
|
|
|
2018-12-25 19:19:21 +00:00
|
|
|
# Retrieve future and ensure it has finished
|
2018-12-30 22:22:29 +00:00
|
|
|
future = net_info["request_future"]
|
2018-12-30 05:10:31 +00:00
|
|
|
r, error_type = get_response(request_future=future,
|
|
|
|
error_type=error_type,
|
|
|
|
social_network=social_network,
|
|
|
|
verbose=verbose)
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
# Attempt to get request information
|
|
|
|
try:
|
|
|
|
http_status = r.status_code
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
try:
|
|
|
|
response_text = r.text.encode(r.encoding)
|
|
|
|
except:
|
|
|
|
pass
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-30 05:10:31 +00:00
|
|
|
if error_type == "message":
|
2018-12-30 22:39:08 +00:00
|
|
|
error = net_info.get("errorMsg")
|
2018-12-30 05:10:31 +00:00
|
|
|
# Checks if the error message is in the HTML
|
|
|
|
if not error in r.text:
|
2018-12-24 14:31:34 +00:00
|
|
|
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
2018-12-25 18:14:39 +00:00
|
|
|
write_to_file(url, fname)
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "yes"
|
2018-12-24 14:31:34 +00:00
|
|
|
else:
|
2018-12-25 18:14:39 +00:00
|
|
|
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network))
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "no"
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-24 14:31:34 +00:00
|
|
|
elif error_type == "status_code":
|
2018-12-30 05:10:31 +00:00
|
|
|
# Checks if the status code of the response is 404
|
|
|
|
if not r.status_code == 404:
|
2018-12-24 14:31:34 +00:00
|
|
|
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
|
|
|
write_to_file(url, fname)
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "yes"
|
2018-12-24 14:31:34 +00:00
|
|
|
else:
|
2018-12-25 18:14:39 +00:00
|
|
|
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network))
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "no"
|
2018-12-24 14:31:34 +00:00
|
|
|
|
|
|
|
elif error_type == "response_url":
|
2018-12-30 22:39:08 +00:00
|
|
|
error = net_info.get("errorUrl")
|
2018-12-30 05:10:31 +00:00
|
|
|
# Checks if the redirect url is the same as the one defined in data.json
|
|
|
|
if not error in r.url:
|
2018-12-24 14:31:34 +00:00
|
|
|
print("\033[37;1m[\033[92;1m+\033[37;1m]\033[92;1m {}:\033[0m".format(social_network), url)
|
|
|
|
write_to_file(url, fname)
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "yes"
|
2018-12-24 14:31:34 +00:00
|
|
|
else:
|
2018-12-25 18:14:39 +00:00
|
|
|
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Not Found!".format(social_network))
|
2018-12-30 05:10:31 +00:00
|
|
|
exists = "no"
|
|
|
|
|
|
|
|
elif error_type == "":
|
|
|
|
print("\033[37;1m[\033[91;1m-\033[37;1m]\033[92;1m {}:\033[93;1m Error!".format(social_network))
|
|
|
|
exists = "error"
|
2018-12-29 04:28:47 +00:00
|
|
|
|
|
|
|
# Save exists flag
|
|
|
|
results_site['exists'] = exists
|
|
|
|
|
|
|
|
# Save results from request
|
|
|
|
results_site['http_status'] = http_status
|
|
|
|
results_site['response_text'] = response_text
|
|
|
|
|
|
|
|
# Add this site's results into final dictionary with all of the other results.
|
|
|
|
results_total[social_network] = results_site
|
2018-12-25 16:01:01 +00:00
|
|
|
|
2018-12-24 14:31:34 +00:00
|
|
|
print("\033[1;92m[\033[0m\033[1;77m*\033[0m\033[1;92m] Saved: \033[37;1m{}\033[0m".format(username+".txt"))
|
2018-12-26 08:26:03 +00:00
|
|
|
|
2018-12-29 04:28:47 +00:00
|
|
|
return results_total
|
2018-12-27 03:54:25 +00:00
|
|
|
|
|
|
|
|
2018-12-28 19:15:41 +00:00
|
|
|
def main():
|
2018-12-27 03:54:25 +00:00
|
|
|
version_string = f"%(prog)s {__version__}\n" + \
|
|
|
|
f"{requests.__description__}: {requests.__version__}\n" + \
|
|
|
|
f"Python: {platform.python_version()}"
|
|
|
|
|
|
|
|
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
|
|
|
|
description=f"{module_name} (Version {__version__})"
|
|
|
|
)
|
|
|
|
parser.add_argument("--version",
|
|
|
|
action="version", version=version_string,
|
|
|
|
help="Display version information and dependencies."
|
|
|
|
)
|
|
|
|
parser.add_argument("--verbose", "-v", "-d", "--debug",
|
|
|
|
action="store_true", dest="verbose", default=False,
|
|
|
|
help="Display extra debugging information."
|
|
|
|
)
|
|
|
|
parser.add_argument("--quiet", "-q",
|
|
|
|
action="store_false", dest="verbose",
|
|
|
|
help="Disable debugging information (Default Option)."
|
|
|
|
)
|
2018-12-29 00:50:25 +00:00
|
|
|
parser.add_argument("--tor", "-t",
|
|
|
|
action="store_true", dest="tor", default=False,
|
2018-12-29 01:31:31 +00:00
|
|
|
help="Make requests over TOR; increases runtime; requires TOR to be installed and in system path.")
|
|
|
|
parser.add_argument("--unique-tor", "-u",
|
|
|
|
action="store_true", dest="unique_tor", default=False,
|
|
|
|
help="Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.")
|
2018-12-29 04:32:30 +00:00
|
|
|
parser.add_argument("--csv",
|
|
|
|
action="store_true", dest="csv", default=False,
|
|
|
|
help="Create Comma-Separated Values (CSV) File."
|
|
|
|
)
|
2018-12-27 03:54:25 +00:00
|
|
|
parser.add_argument("username",
|
|
|
|
nargs='+', metavar='USERNAMES',
|
|
|
|
action="store",
|
|
|
|
help="One or more usernames to check with social networks."
|
|
|
|
)
|
2018-12-26 17:25:28 +00:00
|
|
|
|
|
|
|
args = parser.parse_args()
|
2018-12-26 08:26:03 +00:00
|
|
|
|
2018-12-28 19:15:41 +00:00
|
|
|
# Banner
|
2018-12-27 03:54:25 +00:00
|
|
|
print(
|
|
|
|
"""\033[37;1m .\"\"\"-.
|
|
|
|
\033[37;1m / \\
|
|
|
|
\033[37;1m ____ _ _ _ | _..--'-.
|
|
|
|
\033[37;1m/ ___|| |__ ___ _ __| | ___ ___| |__ >.`__.-\"\"\;\"`
|
|
|
|
\033[37;1m\___ \| '_ \ / _ \ '__| |/ _ \ / __| |/ / / /( ^\\
|
|
|
|
\033[37;1m ___) | | | | __/ | | | (_) | (__| < '-`) =|-.
|
|
|
|
\033[37;1m|____/|_| |_|\___|_| |_|\___/ \___|_|\_\ /`--.'--' \ .-.
|
|
|
|
\033[37;1m .'`-._ `.\ | J /
|
|
|
|
\033[37;1m / `--.| \__/\033[0m""")
|
|
|
|
|
2018-12-29 01:31:31 +00:00
|
|
|
if args.tor or args.unique_tor:
|
2018-12-29 00:50:25 +00:00
|
|
|
print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-29 01:45:19 +00:00
|
|
|
# Run report on all specified users.
|
2018-12-27 03:54:25 +00:00
|
|
|
for username in args.username:
|
|
|
|
print()
|
2018-12-29 14:59:30 +00:00
|
|
|
results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
|
2018-12-25 18:14:39 +00:00
|
|
|
|
2018-12-29 04:32:30 +00:00
|
|
|
if args.csv == True:
|
|
|
|
with open(username + ".csv", "w", newline='') as csv_report:
|
|
|
|
writer = csv.writer(csv_report)
|
|
|
|
writer.writerow(['username',
|
|
|
|
'name',
|
|
|
|
'url_main',
|
|
|
|
'url_user',
|
|
|
|
'exists',
|
|
|
|
'http_status'
|
|
|
|
]
|
|
|
|
)
|
|
|
|
for site in results:
|
|
|
|
writer.writerow([username,
|
|
|
|
site,
|
|
|
|
results[site]['url_main'],
|
|
|
|
results[site]['url_user'],
|
|
|
|
results[site]['exists'],
|
|
|
|
results[site]['http_status']
|
|
|
|
]
|
|
|
|
)
|
2018-12-24 14:31:34 +00:00
|
|
|
|
2018-12-28 19:15:41 +00:00
|
|
|
if __name__ == "__main__":
|
2018-12-29 04:32:30 +00:00
|
|
|
main()
|