Adopt tox and pytest

This commit is contained in:
Paul Pfeister 2024-05-20 04:44:52 -04:00
parent 85ec59e255
commit a785a5931f
No known key found for this signature in database
GPG key ID: 70D33A96CBD7A994
14 changed files with 254 additions and 484 deletions

View file

@ -53,5 +53,8 @@ pandas = ">=1.0.0,<3.0.0"
openpyxl = "^3.0.10"
exrex = "^0.11.0"
[tool.poetry.group.dev.dependencies]
jsonschema = "^4.0.0"
[tool.poetry.scripts]
sherlock = 'sherlock.sherlock:main'

View file

@ -21,23 +21,25 @@ import requests
# Removing __version__ here will trigger update message for users
# Do not remove until ready to trigger that message
# When removed, also remove all the noqa: E402 comments for linting
__version__ = "0.14.4"
del __version__
from .__init__ import (
from .__init__ import ( # noqa: E402
__shortname__,
__longname__,
__version__
)
from requests_futures.sessions import FuturesSession
from torrequest import TorRequest
from sherlock.result import QueryStatus
from sherlock.result import QueryResult
from sherlock.notify import QueryNotifyPrint
from sherlock.sites import SitesInformation
from colorama import init
from argparse import ArgumentTypeError
from requests_futures.sessions import FuturesSession # noqa: E402
from torrequest import TorRequest # noqa: E402
from sherlock.result import QueryStatus # noqa: E402
from sherlock.result import QueryResult # noqa: E402
from sherlock.notify import QueryNotify # noqa: E402
from sherlock.notify import QueryNotifyPrint # noqa: E402
from sherlock.sites import SitesInformation # noqa: E402
from colorama import init # noqa: E402
from argparse import ArgumentTypeError # noqa: E402
class SherlockFuturesSession(FuturesSession):
@ -166,9 +168,9 @@ def multiple_usernames(username):
def sherlock(
username,
site_data,
query_notify,
tor=False,
unique_tor=False,
query_notify: QueryNotify,
tor: bool = False,
unique_tor: bool = False,
proxy=None,
timeout=60,
):

View file

@ -1,4 +0,0 @@
"""Sherlock Tests
This package contains various submodules used to run tests.
"""

View file

@ -1,213 +0,0 @@
"""Sherlock Tests
This module contains various tests.
"""
from tests.base import SherlockBaseTest
import exrex
class SherlockDetectTests(SherlockBaseTest):
def test_detect_true_via_message(self):
"""Test Username Does Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "AllMyLinks"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
return
def test_detect_false_via_message(self):
"""Test Username Does Not Exist (Via Message).
This test ensures that the "message" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "AllMyLinks"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("message", site_data["errorType"])
# Generate a valid username based on the regex for a username that the
# site supports that is *most likely* not taken. The regex is slightly
# modified version of site_data["regexCheck"] as we want a username
# that has the maximum length that is supported by the site. This way,
# we wont generate a random username that might actually exist. This
# method is very hacky, but it does the job as having hardcoded
# usernames that dont exists will lead to people with ill intent to
# create an account with that username which will break the tests
valid_username = exrex.getone(r"^[a-z0-9][a-z0-9-]{32}$")
self.username_check([valid_username], [site], exist_check=False)
return
def test_detect_true_via_status_code(self):
"""Test Username Does Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "BitBucket"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
self.username_check([site_data["username_claimed"]], [site], exist_check=True)
return
def test_detect_false_via_status_code(self):
"""Test Username Does Not Exist (Via Status Code).
This test ensures that the "status code" detection mechanism of
ensuring that a Username does *not* exist works properly.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
site = "BitBucket"
site_data = self.site_data_all[site]
# Ensure that the site's detection method has not changed.
self.assertEqual("status_code", site_data["errorType"])
# Generate a valid username based on the regex for a username that the
# site supports that is *most likely* not taken. The regex is slightly
# modified version of site_data["regexCheck"] as we want a username
# that has the maximum length that is supported by the site. This way,
# we wont generate a random username that might actually exist. This
# method is very hacky, but it does the job as having hardcoded
# usernames that dont exists will lead to people with ill intent to
# create an account with that username which will break the tests
valid_username = exrex.getone(r"^[a-zA-Z0-9-_]{30}")
self.username_check([valid_username], [site], exist_check=False)
return
class SherlockSiteCoverageTests(SherlockBaseTest):
def test_coverage_false_via_status(self):
"""Test Username Does Not Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=False)
return
def test_coverage_true_via_status(self):
"""Test Username Does Exist Site Coverage (Via HTTP Status).
This test checks all sites with the "HTTP Status" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("status_code", exist_check=True)
return
def test_coverage_false_via_message(self):
"""Test Username Does Not Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does not exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=False)
return
def test_coverage_true_via_message(self):
"""Test Username Does Exist Site Coverage (Via Error Message).
This test checks all sites with the "Error Message" detection mechanism
to ensure that a Username that does exist is reported that way.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if detection mechanism did not work as expected.
"""
self.detect_type_check("message", exist_check=True)
return
def test_coverage_total(self):
"""Test Site Coverage Is Total.
This test checks that all sites have test data available.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Will trigger an assert if we do not have total coverage.
"""
self.coverage_total_check()
return

View file

@ -1,224 +0,0 @@
"""Sherlock Base Tests
This module contains various utilities for running tests.
"""
import os
import os.path
import unittest
from sherlock import sherlock
from sherlock.result import QueryStatus
from sherlock.notify import QueryNotify
from sherlock.sites import SitesInformation
import warnings
class SherlockBaseTest(unittest.TestCase):
def setUp(self):
"""Sherlock Base Test Setup.
Does common setup tasks for base Sherlock tests.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
"""
# This ignores the ResourceWarning from an unclosed SSLSocket.
# TODO: Figure out how to fix the code so this is not needed.
warnings.simplefilter("ignore", ResourceWarning)
# Create object with all information about sites we are aware of.
sites = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
# Create original dictionary from SitesInformation() object.
# Eventually, the rest of the code will be updated to use the new object
# directly, but this will glue the two pieces together.
site_data_all = {}
for site in sites:
site_data_all[site.name] = site.information
self.site_data_all = site_data_all
# Load excluded sites list, if any
excluded_sites_path = os.path.join(os.path.dirname(os.path.realpath(sherlock.__file__)), "tests/.excluded_sites")
try:
with open(excluded_sites_path, "r", encoding="utf-8") as excluded_sites_file:
self.excluded_sites = excluded_sites_file.read().splitlines()
except FileNotFoundError:
self.excluded_sites = []
# Create notify object for query results.
self.query_notify = QueryNotify()
self.tor = False
self.unique_tor = False
self.timeout = None
self.skip_error_sites = True
return
def site_data_filter(self, site_list):
"""Filter Site Data.
Keyword Arguments:
self -- This object.
site_list -- List of strings corresponding to sites which
should be filtered.
Return Value:
Dictionary containing sub-set of site data specified by "site_list".
"""
# Create new dictionary that has filtered site data based on input.
# Note that any site specified which is not understood will generate
# an error.
site_data = {}
for site in site_list:
with self.subTest(f"Checking test vector Site '{site}' "
f"exists in total site data."
):
site_data[site] = self.site_data_all[site]
return site_data
def username_check(self, username_list, site_list, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
username_list -- List of strings corresponding to usernames
which should exist on *all* of the sites.
site_list -- List of strings corresponding to sites which
should be filtered.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
Nothing.
Will trigger an assert if Username does not have the expected
existence state.
"""
# Filter all site data down to just what is needed for this test.
site_data = self.site_data_filter(site_list)
if exist_check:
check_type_text = "claimed"
exist_result_desired = QueryStatus.CLAIMED
else:
check_type_text = "available"
exist_result_desired = QueryStatus.AVAILABLE
for username in username_list:
results = sherlock.sherlock(username,
site_data,
self.query_notify,
tor=self.tor,
unique_tor=self.unique_tor,
timeout=self.timeout
)
for site, result in results.items():
with self.subTest(f"Checking Username '{username}' "
f"{check_type_text} on Site '{site}'"
):
if (
(self.skip_error_sites == True) and
(result["status"].status == QueryStatus.UNKNOWN)
):
#Some error connecting to site.
self.skipTest(f"Skipping Username '{username}' "
f"{check_type_text} on Site '{site}': "
f"Site returned error status."
)
self.assertEqual(exist_result_desired,
result["status"].status)
return
def detect_type_check(self, detect_type, exist_check=True):
"""Username Exist Check.
Keyword Arguments:
self -- This object.
detect_type -- String corresponding to detection algorithm
which is desired to be tested.
Note that only sites which have documented
usernames which exist and do not exist
will be tested.
exist_check -- Boolean which indicates if this should be
a check for Username existence,
or non-existence.
Return Value:
Nothing.
Runs tests on all sites using the indicated detection algorithm
and which also has test vectors specified.
Will trigger an assert if Username does not have the expected
existence state.
"""
# Dictionary of sites that should be tested for having a username.
# This will allow us to test sites with a common username in parallel.
sites_by_username = {}
for site, site_data in self.site_data_all.items():
if (
(site in self.excluded_sites) or
(site_data["errorType"] != detect_type) or
(site_data.get("username_claimed") is None) or
(site_data.get("username_unclaimed") is None)
):
# This is either not a site we are interested in, or the
# site does not contain the required information to do
# the tests.
pass
else:
# We should run a test on this site.
# Figure out which type of user
if exist_check:
username = site_data.get("username_claimed")
else:
username = site_data.get("username_unclaimed")
# Add this site to the list of sites corresponding to this
# username.
if username in sites_by_username:
sites_by_username[username].append(site)
else:
sites_by_username[username] = [site]
# Check on the username availability against all of the sites.
for username, site_list in sites_by_username.items():
self.username_check([username],
site_list,
exist_check=exist_check
)
return
def coverage_total_check(self):
"""Total Coverage Check.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
Counts up all Sites with full test data available.
Will trigger an assert if any Site does not have test coverage.
"""
site_no_tests_list = []
for site, site_data in self.site_data_all.items():
if site_data.get("username_claimed") is None:
# Test information not available on this site.
site_no_tests_list.append(site)
self.assertEqual("", ", ".join(site_no_tests_list))
return

14
tests/conftest.py Normal file
View file

@ -0,0 +1,14 @@
import os
import pytest
from sherlock.sites import SitesInformation
@pytest.fixture()
def sites_obj():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
yield sites_obj
@pytest.fixture(scope="session")
def sites_info():
sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json"))
sites_iterable = {site.name: site.information for site in sites_obj}
yield sites_iterable

7
tests/few_test_basic.py Normal file
View file

@ -0,0 +1,7 @@
import sherlock
#from sherlock.sites import SitesInformation
#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json")
def test_username_via_message():
sherlock.__main__("--version")

View file

@ -0,0 +1,23 @@
import os
import re
import subprocess
class Interactives:
def run_cli(args: str = "") -> str:
command = [f"sherlock {args}"]
proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
return proc_out.decode()
def walk_sherlock_for_files_with(pattern: str) -> list[str]:
pattern: re.Pattern = re.compile(pattern)
matching_files: list[str] = []
for root, dirs, files in os.walk("sherlock"):
for file in files:
file_path = os.path.join(root,file)
if "__pycache__" in file_path:
continue
with open(file_path, 'r', errors='ignore') as f:
if pattern.search(f.read()):
matching_files.append(file_path)
return matching_files

28
tests/test_manifest.py Normal file
View file

@ -0,0 +1,28 @@
import os
import json
import pytest
from jsonschema import validate
def validate_json(jsonfile: str, schemafile: str) -> bool:
with open(jsonfile, 'r') as f:
jsondat = json.load(f)
with open(schemafile, 'r') as f:
schemadat = json.load(f)
validate(instance=jsondat, schema=schemadat)
return True
def test_validate_manifest_against_schema():
json_relative: str = '../sherlock/resources/data.json'
schema_relative: str = '../sherlock/resources/data.schema.json'
json_path: str = os.path.join(os.path.dirname(__file__), json_relative)
schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative)
validate_json(jsonfile=json_path, schemafile=schema_path)
# Ensure that the expected values are beind returned by the site list
@pytest.mark.parametrize("target_name,target_expected_err_type", [
('GitHub', 'status_code'),
('GitLab', 'message'),
])
def test_site_list_iterability (sites_info, target_name, target_expected_err_type):
assert sites_info[target_name]['errorType'] == target_expected_err_type

View file

@ -1,28 +0,0 @@
import unittest
import sys
sys.path.append('../')
from sherlock import sherlock as sh
checksymbols = []
checksymbols = ["_", "-", "."]
"""Test for multiple usernames.
This test ensures that the function multiple_usernames works properly. More specific,
different scenarios are tested and only usernames that contain this specific sequence: {?}
should return positive.
Keyword Arguments:
self -- This object.
Return Value:
Nothing.
"""
class TestMultipleUsernames(unittest.TestCase):
def test_area(self):
test_usernames = ["test{?}test" , "test{?feo" , "test"]
for name in test_usernames:
if(sh.check_for_parameter(name)):
self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"])
else:
self.assertAlmostEqual(name, name)

102
tests/test_probes.py Normal file
View file

@ -0,0 +1,102 @@
import pytest
import random
import string
import re
from sherlock.sherlock import sherlock
from sherlock.notify import QueryNotify
from sherlock.result import QueryStatus
#from sherlock_interactives import Interactives
def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus:
query_notify = QueryNotify()
site_data: dict = {}
site_data[site] = sites_info[site]
return sherlock(
username=username,
site_data=site_data,
query_notify=query_notify,
)[site]['status'].status
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('GitLab', 'ppfeister'),
('AllMyLinks', 'blue'),
])
def test_known_positives_via_message(sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('GitHub', 'ppfeister'),
('GitHub', 'sherlock-project'),
('Docker Hub', 'ppfeister'),
('Docker Hub', 'sherlock'),
])
def test_known_positives_via_status_code(sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Known positives should only use sites trusted to be reliable and unchanging
@pytest.mark.parametrize('site,username',[
('BodyBuilding', 'blue'),
('labpentestit', 'CSV'),
])
def test_known_positives_via_response_url(sites_info, site, username):
assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED
# Randomly generate usernames of high length and test for positive availability
# Randomly generated usernames should be simple alnum for simplicity and high
# compatibility. Several attempts may be made ~just in case~ a real username is
# generated.
@pytest.mark.parametrize('site,random_len',[
('GitLab', 255),
('Codecademy', 30)
])
def test_likely_negatives_via_message(sites_info, site, random_len):
num_attempts: int = 3
attempted_usernames: list[str] = []
status: QueryStatus = QueryStatus.CLAIMED
for i in range(num_attempts):
acceptable_types = string.ascii_letters + string.digits
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
attempted_usernames.append(random_handle)
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
if status is QueryStatus.AVAILABLE:
break
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
# Randomly generate usernames of high length and test for positive availability
# Randomly generated usernames should be simple alnum for simplicity and high
# compatibility. Several attempts may be made ~just in case~ a real username is
# generated.
@pytest.mark.parametrize('site,random_len',[
('GitHub', 39),
('Docker Hub', 30)
])
def test_likely_negatives_via_status_code(sites_info, site, random_len):
num_attempts: int = 3
attempted_usernames: list[str] = []
status: QueryStatus = QueryStatus.CLAIMED
for i in range(num_attempts):
acceptable_types = string.ascii_letters + string.digits
random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len))
attempted_usernames.append(random_handle)
status = simple_query(sites_info=sites_info, site=site, username=random_handle)
if status is QueryStatus.AVAILABLE:
break
assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}."
def test_username_illegal_regex(sites_info):
site: str = 'BitBucket'
invalid_handle: str = '*#$Y&*JRE'
pattern = re.compile(sites_info[site]['regexCheck'])
# Ensure that the username actually fails regex before testing sherlock
assert pattern.match(invalid_handle) is None
assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL

38
tests/test_ux.py Normal file
View file

@ -0,0 +1,38 @@
import pytest
from sherlock import sherlock
def test_remove_nsfw(sites_obj):
nsfw_target: str = 'Pornhub'
assert nsfw_target in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites()
assert nsfw_target not in {site.name: site.information for site in sites_obj}
# Parametrized sites should *not* include Motherless, which is acting as the control
@pytest.mark.parametrize('nsfwsites', [
['Pornhub'],
['Pornhub', 'Xvideos'],
])
def test_nsfw_explicit_selection(sites_obj, nsfwsites):
for site in nsfwsites:
assert site in {site.name: site.information for site in sites_obj}
sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites)
for site in nsfwsites:
assert site in {site.name: site.information for site in sites_obj}
assert 'Motherless' not in {site.name: site.information for site in sites_obj}
def test_wildcard_username_expansion():
assert sherlock.check_for_parameter('test{?}test') is True
assert sherlock.check_for_parameter('test{.}test') is False
assert sherlock.check_for_parameter('test{}test') is False
assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"]
#def test_area(self):
# test_usernames = ["test{?}test" , "test{?feo" , "test"]
# for name in test_usernames:
# if(sh.check_for_parameter(name)):
# self.assertAlmostEqual(sh.multiple_usernames(name), ["test_test" , "test-test" , "test.test"])
# else:
# self.assertAlmostEqual(name, name)

9
tests/test_version.py Normal file
View file

@ -0,0 +1,9 @@
from sherlock_interactives import Interactives
import sherlock
def test_versioning() -> None:
# Ensure __version__ matches version presented to the user
assert sherlock.__version__ in Interactives.run_cli("--version")
# Ensure __init__ is single source of truth for __version__ in package
# Temporarily allows sherlock.py so as to not trigger early upgrades
assert Interactives.walk_sherlock_for_files_with(r'__version__ *= *') == [ "sherlock/__init__.py", "sherlock/sherlock.py" ]

19
tox.ini
View file

@ -11,10 +11,15 @@ envlist =
py38
[testenv]
allowlist_externals = poetry
description = Attempt to build and install the package
deps =
coverage
jsonschema
pytest
allowlist_externals = coverage
commands =
poetry plugin add poetry-version-plugin
poetry install --no-root --with dev
coverage run --source=sherlock --module pytest -v
coverage report --show-missing
[testenv:lint]
description = Lint with Ruff
@ -23,3 +28,11 @@ deps =
commands =
ruff check
[gh]
python =
3.13 = py31
3.12 = py312
3.11 = py311
3.10 = py310
3.9 = py39
3.8 = py38