sherlock/load_proxies.py

import csv
import requests
import time
from collections import namedtuple
from colorama import Fore, Style

"""
A function which loads proxies from a .csv file, to a list.

Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'.

Outputs: list containing proxies stored in named tuples.
"""


def load_proxies_from_csv(path_to_list):
    Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol'])

    with open(path_to_list, 'r') as csv_file:
        csv_reader = csv.DictReader(csv_file)
        proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader]

    return proxies


"""
A function which test the proxy by attempting 
to make a request to the designated website.

We use 'wikipedia.org' as a test, since we can test the proxy anonymity 
by check if the returning 'X-Client-IP' header matches the proxy ip.
"""


def check_proxy(proxy_ip, proxy_port, protocol):
    full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}'
    proxies = {'http': full_proxy, 'https': full_proxy}
    try:
        r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4)
        return_proxy = r.headers['X-Client-IP']
        if proxy_ip==return_proxy:
            return True
        else:
            return False
    except Exception:
        return False


"""
A function which takes in one mandatory argument -> a proxy list in
the format returned by the function 'load_proxies_from_csv'.
    
It also takes an optional argument 'max_proxies', if the user wishes to
cap the number of validated proxies.

Each proxy is tested by the check_proxy function. Since each test is done on 
'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules, 
but are sending successive requests each separated by at least 1 sec.

Outputs: list containing proxies stored in named tuples. 
"""


from colorama import Fore, Style

def check_proxy_list(proxy_list, max_proxies=None):
    print((Style.BRIGHT + Fore.GREEN + "[" +
           Fore.YELLOW + "*" +
           Fore.GREEN + "] Started checking proxies."))
    working_proxies = []

    # If the user has limited the number of proxies we need,
    # the function will stop when the working_proxies
    # loads the max number of requested proxies.
    if max_proxies != None:
        for proxy in proxy_list:
            if len(working_proxies) < max_proxies:
                time.sleep(1)
                if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:
                    working_proxies.append(proxy)
            else:
                break
    else:
        for proxy in proxy_list:
            time.sleep(1)
            if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:
                working_proxies.append(proxy)

    if len(working_proxies) > 0:
        print((Style.BRIGHT + Fore.GREEN + "[" +
               Fore.YELLOW + "*" +
               Fore.GREEN + "] Finished checking proxies."))
        return working_proxies

    else:
        raise Exception("Found no working proxies.")
Created new file 'load_proxies.py' to store functions for reading proxies from files, and checking proxy anonimity. Created the function 'load_proxies_from_csv' which reads proxies from a .csv file to a list of named tuples. 2019-01-20 17:59:33 +00:00			`import csv`
			`import requests`
			`import time`
			`from collections import namedtuple`
Function 'check_proxy_list' which checks anonimity of each proxy contained in a list of named tuples. Proxies are checked by using the 'check_proxy' function. 2019-01-20 18:07:32 +00:00			`from colorama import Fore, Style`
Created new file 'load_proxies.py' to store functions for reading proxies from files, and checking proxy anonimity. Created the function 'load_proxies_from_csv' which reads proxies from a .csv file to a list of named tuples. 2019-01-20 17:59:33 +00:00
			`"""`
			`A function which loads proxies from a .csv file, to a list.`

			`Inputs: path to .csv file which contains proxies, described by fields: 'ip', 'port', 'protocol'.`

			`Outputs: list containing proxies stored in named tuples.`
			`"""`


			`def load_proxies_from_csv(path_to_list):`
			`Proxy = namedtuple('Proxy', ['ip', 'port', 'protocol'])`

			`with open(path_to_list, 'r') as csv_file:`
			`csv_reader = csv.DictReader(csv_file)`
			`proxies = [Proxy(line['ip'],line['port'],line['protocol']) for line in csv_reader]`

Function 'check_proxy', which checks anonimity of a signle proxy by anaylizing return headers received from a request using the proxy in question. 2019-01-20 18:02:57 +00:00			`return proxies`


			`"""`
			`A function which test the proxy by attempting`
			`to make a request to the designated website.`

			`We use 'wikipedia.org' as a test, since we can test the proxy anonymity`
			`by check if the returning 'X-Client-IP' header matches the proxy ip.`
			`"""`


			`def check_proxy(proxy_ip, proxy_port, protocol):`
			`full_proxy = f'{protocol}://{proxy_ip}:{proxy_port}'`
			`proxies = {'http': full_proxy, 'https': full_proxy}`
			`try:`
			`r = requests.get('https://www.wikipedia.org',proxies=proxies, timeout=4)`
			`return_proxy = r.headers['X-Client-IP']`
			`if proxy_ip==return_proxy:`
			`return True`
			`else:`
			`return False`
			`except Exception:`
Function 'check_proxy_list' which checks anonimity of each proxy contained in a list of named tuples. Proxies are checked by using the 'check_proxy' function. 2019-01-20 18:07:32 +00:00			`return False`


			`"""`
			`A function which takes in one mandatory argument -> a proxy list in`
			`the format returned by the function 'load_proxies_from_csv'.`

			`It also takes an optional argument 'max_proxies', if the user wishes to`
			`cap the number of validated proxies.`

			`Each proxy is tested by the check_proxy function. Since each test is done on`
			`'wikipedia.org', in order to be considerate to Wikipedia servers, we are not using any async modules,`
			`but are sending successive requests each separated by at least 1 sec.`

			`Outputs: list containing proxies stored in named tuples.`
			`"""`


			`from colorama import Fore, Style`

			`def check_proxy_list(proxy_list, max_proxies=None):`
			`print((Style.BRIGHT + Fore.GREEN + "[" +`
			`Fore.YELLOW + "*" +`
			`Fore.GREEN + "] Started checking proxies."))`
			`working_proxies = []`

			`# If the user has limited the number of proxies we need,`
			`# the function will stop when the working_proxies`
			`# loads the max number of requested proxies.`
			`if max_proxies != None:`
			`for proxy in proxy_list:`
			`if len(working_proxies) < max_proxies:`
			`time.sleep(1)`
			`if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:`
			`working_proxies.append(proxy)`
			`else:`
			`break`
			`else:`
			`for proxy in proxy_list:`
			`time.sleep(1)`
			`if check_proxy(proxy.ip,proxy.port,proxy.protocol) == True:`
			`working_proxies.append(proxy)`

			`if len(working_proxies) > 0:`
			`print((Style.BRIGHT + Fore.GREEN + "[" +`
			`Fore.YELLOW + "*" +`
			`Fore.GREEN + "] Finished checking proxies."))`
			`return working_proxies`

			`else:`
			`raise Exception("Found no working proxies.")`