Add command line option to only run a report on specified sites (as opposed to all of them). Move loading of JSON file out of the query logic proper: we need to keep the database and the query logic separate anyway for future changes, so this is a first step in the refactoring. Update readme file with latest information.

This commit is contained in:
Christopher K. Hoadley 2019-01-05 22:52:53 -06:00
parent 4596f7121e
commit 33e8beb5b4
2 changed files with 50 additions and 17 deletions

View file

@ -11,7 +11,7 @@
```bash
# clone the repo
$ git clone https://github.com/sdushantha/sherlock.git
$ git clone https://github.com/TheYahya/sherlock.git
# change the working directory to sherlock
$ cd sherlock
@ -24,10 +24,11 @@ $ pip3 install -r requirements.txt
```bash
$ python3 sherlock.py --help
usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--csv] [--tor] [--unique-tor]
usage: sherlock.py [-h] [--version] [--verbose] [--quiet] [--tor]
[--unique-tor] [--csv] [--site SITE_NAME]
USERNAMES [USERNAMES ...]
Sherlock: Find Usernames Across Social Networks (Version 2018.12.30)
Sherlock: Find Usernames Across Social Networks (Version 0.2.0)
positional arguments:
USERNAMES One or more usernames to check with social networks.
@ -38,9 +39,14 @@ optional arguments:
--verbose, -v, -d, --debug
Display extra debugging information.
--quiet, -q Disable debugging information (Default Option).
--tor, -t Make requests over TOR; increases runtime; requires
TOR to be installed and in system path.
--unique-tor, -u Make requests over TOR with new TOR circuit after each
request; increases runtime; requires TOR to be
installed and in system path.
--csv Create Comma-Separated Values (CSV) File.
--tor, -t Make requests over TOR; increases runtime; requires TOR to be installed and in system path.
--unique-tor, -u Make requests over TOR with new TOR circuit after each request; increases runtime; requires TOR to be installed and in system path.
--site SITE_NAME Limit analysis to just the listed sites. Add multiple
options to specify more than one site.
```
For example, run ```python3 sherlock.py user123```, and all of the accounts

View file

@ -10,6 +10,7 @@ networks.
import csv
import json
import os
import sys
import platform
import re
from argparse import ArgumentParser, RawDescriptionHelpFormatter
@ -21,7 +22,7 @@ from requests_futures.sessions import FuturesSession
from torrequest import TorRequest
module_name = "Sherlock: Find Usernames Across Social Networks"
__version__ = "0.1.10"
__version__ = "0.2.0"
amount=0
# TODO: fix tumblr
@ -60,7 +61,7 @@ def get_response(request_future, error_type, social_network, verbose=False):
return None, ""
def sherlock(username, verbose=False, tor=False, unique_tor=False):
def sherlock(username, site_data, verbose=False, tor=False, unique_tor=False):
"""Run Sherlock Analysis.
Checks for existence of username on various social media sites.
@ -68,6 +69,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
Keyword Arguments:
username -- String indicating username that report
should be created against.
site_data -- Dictionary containing all of the site data.
verbose -- Boolean indicating whether to give verbose output.
tor -- Boolean indicating whether to use a tor circuit for the requests.
unique_tor -- Boolean indicating whether to use a new tor circuit for each request.
@ -107,13 +109,8 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0'
}
# Load the data
data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
with open(data_file_path, "r", encoding="utf-8") as raw:
data = json.load(raw)
# Allow 1 thread for each external service, so `len(data)` threads total
executor = ThreadPoolExecutor(max_workers=len(data))
# Allow 1 thread for each external service, so `len(site_data)` threads total
executor = ThreadPoolExecutor(max_workers=len(site_data))
# Create session based on request methodology
underlying_session = requests.session()
@ -129,7 +126,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
results_total = {}
# First create futures for all requests. This allows for the requests to run in parallel
for social_network, net_info in data.items():
for social_network, net_info in site_data.items():
# Results from analysis of this specific site
results_site = {}
@ -175,7 +172,7 @@ def sherlock(username, verbose=False, tor=False, unique_tor=False):
f = open_file(fname)
# Core logic: If tor requests, make them here. If multi-threaded requests, wait for responses
for social_network, net_info in data.items():
for social_network, net_info in site_data.items():
# Retrieve results again
results_site = results_total.get(social_network)
@ -330,6 +327,11 @@ def main():
action="store_true", dest="csv", default=False,
help="Create Comma-Separated Values (CSV) File."
)
parser.add_argument("--site",
action="append", metavar='SITE_NAME',
dest="site_list", default=None,
help="Limit analysis to just the listed sites. Add multiple options to specify more than one site."
)
parser.add_argument("username",
nargs='+', metavar='USERNAMES',
action="store",
@ -353,10 +355,35 @@ def main():
if args.tor or args.unique_tor:
print("Warning: some websites might refuse connecting over TOR, so note that using this option might increase connection errors.")
# Load the data
data_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data.json")
with open(data_file_path, "r", encoding="utf-8") as raw:
site_data_all = json.load(raw)
if args.site_list is None:
# Not desired to look at a sub-set of sites
site_data = site_data_all
else:
# User desires to selectively run queries on a sub-set of the site list.
# Make sure that the sites are supported & build up pruned site database.
site_data = {}
site_missing = []
for site in args.site_list:
if site in site_data_all:
site_data[site] = site_data_all[site]
else:
# Build up list of sites not supported for future error message.
site_missing.append(f"'{site}'")
if site_missing != []:
print(f"Error: Desired sites not found: {', '.join(site_missing)}.")
sys.exit(1)
# Run report on all specified users.
for username in args.username:
print()
results = sherlock(username, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
results = sherlock(username, site_data, verbose=args.verbose, tor=args.tor, unique_tor=args.unique_tor)
if args.csv == True:
with open(username + ".csv", "w", newline='', encoding="utf-8") as csv_report: