2018-10-07 10:11:59 +00:00
|
|
|
"""
|
|
|
|
All location related functions and converters.
|
2018-10-07 10:33:43 +00:00
|
|
|
|
|
|
|
The main entry point is `location_processing`
|
|
|
|
which gets `location` and `source_ip_address`
|
|
|
|
and basing on this information generates
|
|
|
|
precise location description.
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
"""
|
2018-12-02 08:41:29 +00:00
|
|
|
from __future__ import print_function
|
2018-10-07 10:11:59 +00:00
|
|
|
|
2020-04-03 18:58:51 +00:00
|
|
|
import sys
|
2018-10-07 10:33:43 +00:00
|
|
|
import os
|
2018-10-07 10:11:59 +00:00
|
|
|
import json
|
2018-10-07 10:33:43 +00:00
|
|
|
import socket
|
2018-10-07 10:11:59 +00:00
|
|
|
import requests
|
|
|
|
import geoip2.database
|
2020-11-15 16:53:59 +00:00
|
|
|
import pycountry
|
2018-10-07 10:11:59 +00:00
|
|
|
|
2018-10-07 10:33:43 +00:00
|
|
|
from globals import GEOLITE, GEOLOCATOR_SERVICE, IP2LCACHE, IP2LOCATION_KEY, NOT_FOUND_LOCATION, \
|
2020-07-19 18:57:18 +00:00
|
|
|
ALIASES, BLACKLIST, IATA_CODES_FILE, IPLOCATION_ORDER, IPINFO_TOKEN
|
2018-10-07 10:33:43 +00:00
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
GEOIP_READER = geoip2.database.Reader(GEOLITE)
|
|
|
|
|
2020-11-15 16:54:51 +00:00
|
|
|
COUNTRY_MAP = {"Russian Federation": "Russia"}
|
|
|
|
|
2020-10-29 06:14:08 +00:00
|
|
|
def _debug_log(s):
|
|
|
|
with open("/tmp/debug.log", "a") as f:
|
|
|
|
f.write(s+"\n")
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
def ascii_only(string):
|
|
|
|
"Check if `string` contains only ASCII symbols"
|
|
|
|
|
|
|
|
try:
|
|
|
|
for _ in range(5):
|
|
|
|
string = string.encode('utf-8')
|
|
|
|
return True
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
return False
|
|
|
|
|
2018-10-07 10:33:43 +00:00
|
|
|
def is_ip(ip_addr):
|
|
|
|
"""
|
|
|
|
Check if `ip_addr` looks like an IP Address
|
|
|
|
"""
|
|
|
|
|
2020-04-03 18:58:51 +00:00
|
|
|
if sys.version_info[0] < 3:
|
|
|
|
ip_addr = ip_addr.encode("utf-8")
|
|
|
|
|
2018-10-07 10:33:43 +00:00
|
|
|
try:
|
2020-04-03 18:58:51 +00:00
|
|
|
socket.inet_pton(socket.AF_INET, ip_addr)
|
2018-10-07 10:33:43 +00:00
|
|
|
return True
|
|
|
|
except socket.error:
|
2019-07-07 17:10:05 +00:00
|
|
|
try:
|
2020-04-03 18:58:51 +00:00
|
|
|
socket.inet_pton(socket.AF_INET6, ip_addr)
|
2019-07-07 17:10:05 +00:00
|
|
|
return True
|
|
|
|
except socket.error:
|
|
|
|
return False
|
2018-10-07 10:33:43 +00:00
|
|
|
|
|
|
|
def location_normalize(location):
|
|
|
|
"""
|
|
|
|
Normalize location name `location`
|
|
|
|
"""
|
|
|
|
#translation_table = dict.fromkeys(map(ord, '!@#$*;'), None)
|
|
|
|
def _remove_chars(chars, string):
|
|
|
|
return ''.join(x for x in string if x not in chars)
|
|
|
|
|
|
|
|
location = location.lower().replace('_', ' ').replace('+', ' ').strip()
|
|
|
|
if not location.startswith('moon@'):
|
|
|
|
location = _remove_chars(r'!@#$*;:\\', location)
|
|
|
|
return location
|
|
|
|
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
|
|
|
|
def geolocator(location):
|
|
|
|
"""
|
|
|
|
Return a GPS pair for specified `location` or None
|
2020-11-02 02:04:14 +00:00
|
|
|
if nothing can be found
|
2018-10-07 10:11:59 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
try:
|
|
|
|
geo = requests.get('%s/%s' % (GEOLOCATOR_SERVICE, location)).text
|
|
|
|
except requests.exceptions.ConnectionError as exception:
|
2018-12-02 08:41:29 +00:00
|
|
|
print("ERROR: %s" % exception)
|
2018-10-07 10:11:59 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
if geo == "":
|
|
|
|
return None
|
|
|
|
|
|
|
|
try:
|
|
|
|
answer = json.loads(geo.encode('utf-8'))
|
|
|
|
return answer
|
|
|
|
except ValueError as exception:
|
2018-12-02 08:41:29 +00:00
|
|
|
print("ERROR: %s" % exception)
|
2018-10-07 10:11:59 +00:00
|
|
|
return None
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2020-07-19 18:57:18 +00:00
|
|
|
def ipcachewrite(ip_addr, location):
|
2020-11-01 22:37:26 +00:00
|
|
|
""" Write a retrieved ip+location into cache
|
|
|
|
Can stress some filesystems after long term use, see
|
|
|
|
https://stackoverflow.com/questions/466521/how-many-files-can-i-put-in-a-directory
|
|
|
|
"""
|
|
|
|
cachefile = os.path.join(IP2LCACHE, ip_addr)
|
2020-07-19 18:57:18 +00:00
|
|
|
if not os.path.exists(IP2LCACHE):
|
|
|
|
os.makedirs(IP2LCACHE)
|
2020-11-01 22:37:26 +00:00
|
|
|
with open(cachefile, 'w') as file:
|
2020-11-15 17:10:19 +00:00
|
|
|
file.write(location[3] + ';' + location[2] + ';' + location[1] + ';' + location[0] + ';' + location[4] + ';' + location[5])
|
|
|
|
# like ip2location format
|
2020-07-19 18:57:18 +00:00
|
|
|
|
|
|
|
def ipcache(ip_addr):
|
2020-11-01 22:37:45 +00:00
|
|
|
""" Retrieve a location from cache by ip addr
|
|
|
|
Returns a triple of (CITY, REGION, COUNTRY) or None
|
2020-11-15 17:10:19 +00:00
|
|
|
TODO: When cache becomes more robust, transition to using latlong
|
2020-11-01 22:37:45 +00:00
|
|
|
"""
|
|
|
|
cachefile = os.path.join(IP2LCACHE, ip_addr)
|
2018-10-07 10:11:59 +00:00
|
|
|
if not os.path.exists(IP2LCACHE):
|
|
|
|
os.makedirs(IP2LCACHE)
|
|
|
|
|
2020-11-01 22:37:45 +00:00
|
|
|
if os.path.exists(cachefile):
|
|
|
|
try:
|
2020-11-15 17:10:19 +00:00
|
|
|
_, country, region, city, *_ = open(cachefile, 'r').read().split(';')
|
2020-11-01 22:37:45 +00:00
|
|
|
return city, region, country
|
|
|
|
except ValueError:
|
2020-11-15 17:10:19 +00:00
|
|
|
# cache entry is malformed: should be
|
|
|
|
# [ccode];country;region;city;[lat];[long];...
|
2020-11-01 22:37:45 +00:00
|
|
|
return None
|
|
|
|
return None
|
2020-09-20 09:09:04 +00:00
|
|
|
|
2020-07-19 18:57:18 +00:00
|
|
|
|
|
|
|
def ip2location(ip_addr):
|
2020-11-01 22:35:17 +00:00
|
|
|
"""Convert IP address `ip_addr` to a location name"""
|
|
|
|
# if IP2LOCATION_KEY is not set, do not query,
|
2020-07-19 18:57:18 +00:00
|
|
|
# because the query wont be processed anyway
|
2020-11-01 22:35:17 +00:00
|
|
|
if not IP2LOCATION_KEY:
|
2020-11-15 16:56:43 +00:00
|
|
|
return None
|
2020-11-01 22:35:17 +00:00
|
|
|
try:
|
|
|
|
r = requests.get(
|
2020-11-15 16:56:43 +00:00
|
|
|
'http://api.ip2location.com/?ip=%s&key=%s&package=WS5'
|
2020-11-01 22:35:17 +00:00
|
|
|
% (ip_addr, IP2LOCATION_KEY))
|
|
|
|
r.raise_for_status()
|
|
|
|
location = r.text
|
|
|
|
if location and ';' in location:
|
2020-11-15 16:56:43 +00:00
|
|
|
ccode, country, region, city, lat, long, *_ = location.split(';')
|
2020-11-01 22:35:17 +00:00
|
|
|
except requests.exceptions.RequestException:
|
2020-11-15 16:56:43 +00:00
|
|
|
return None
|
|
|
|
return city, region, country, ccode, lat, long
|
2018-10-07 10:11:59 +00:00
|
|
|
|
|
|
|
|
2020-07-19 18:57:18 +00:00
|
|
|
def ipinfo(ip_addr):
|
2020-11-01 22:32:13 +00:00
|
|
|
if not IPINFO_TOKEN:
|
2020-11-15 16:57:22 +00:00
|
|
|
return None
|
2020-11-01 22:32:13 +00:00
|
|
|
try:
|
|
|
|
r = requests.get(
|
|
|
|
'https://ipinfo.io/%s/json?token=%s'
|
|
|
|
% (ip_addr, IPINFO_TOKEN))
|
|
|
|
r.raise_for_status()
|
|
|
|
r_json = r.json()
|
2020-11-15 16:57:22 +00:00
|
|
|
# can't do two unpackings on one line
|
|
|
|
city, region, country, ccode = r_json["city"], r_json["region"], '', r_json["country"],
|
|
|
|
lat, long = r_json["loc"].split(',')
|
|
|
|
# NOTE: ipinfo only provides ISO codes for countries
|
|
|
|
country = pycountry.countries.get(alpha_2=ccode).name
|
2020-11-01 22:32:13 +00:00
|
|
|
except (requests.exceptions.RequestException, ValueError):
|
|
|
|
# latter is thrown by failure to parse json in reponse
|
2020-11-15 16:57:22 +00:00
|
|
|
return None
|
|
|
|
return city, region, country, ccode, lat, long
|
2020-07-19 18:57:18 +00:00
|
|
|
|
|
|
|
|
|
|
|
def geoip(ip_addr):
|
2018-11-02 17:12:53 +00:00
|
|
|
try:
|
|
|
|
response = GEOIP_READER.city(ip_addr)
|
2020-11-15 16:57:53 +00:00
|
|
|
city, region, country, ccode, lat, long = response.city.name, response.subdivisions.name, response.country.name, response.country.iso_code, response.location.latitude, response.location.longitude
|
2018-11-02 17:12:53 +00:00
|
|
|
except geoip2.errors.AddressNotFoundError:
|
2020-11-15 16:57:53 +00:00
|
|
|
return None
|
|
|
|
return city, region, country, ccode, lat, long
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
|
2020-11-15 16:54:51 +00:00
|
|
|
def workaround(country):
|
|
|
|
# workaround for strange bug with the country name
|
2020-07-19 18:57:18 +00:00
|
|
|
# maybe some other countries has this problem too
|
2020-11-15 16:54:51 +00:00
|
|
|
country = COUNTRY_MAP.get(country) or country
|
|
|
|
return country
|
2020-07-19 18:57:18 +00:00
|
|
|
|
|
|
|
def get_location(ip_addr):
|
|
|
|
"""
|
2020-11-01 22:30:00 +00:00
|
|
|
Return location triple (CITY, REGION, COUNTRY) for `ip_addr`
|
2020-07-19 18:57:18 +00:00
|
|
|
"""
|
2020-11-01 22:31:15 +00:00
|
|
|
location = ipcache(ip_addr)
|
|
|
|
if location:
|
|
|
|
return location
|
|
|
|
|
2020-11-15 16:58:51 +00:00
|
|
|
# location from iplocators have the following order:
|
|
|
|
# (CITY, REGION, COUNTRY, CCODE, LAT, LONG)
|
2020-07-19 18:57:18 +00:00
|
|
|
for method in IPLOCATION_ORDER:
|
|
|
|
if method == 'geoip':
|
2020-11-15 16:58:51 +00:00
|
|
|
location = geoip(ip_addr)
|
2020-07-19 18:57:18 +00:00
|
|
|
elif method == 'ip2location':
|
2020-11-15 16:58:51 +00:00
|
|
|
location = ip2location(ip_addr)
|
2020-07-19 18:57:18 +00:00
|
|
|
elif method == 'ipinfo':
|
2020-11-15 16:58:51 +00:00
|
|
|
location = ipinfo(ip_addr)
|
2020-07-19 18:57:18 +00:00
|
|
|
else:
|
2020-11-01 22:31:15 +00:00
|
|
|
print("ERROR: invalid iplocation method specified: %s" % method)
|
2020-11-15 17:02:12 +00:00
|
|
|
if location is not None:
|
|
|
|
break
|
2020-11-01 22:31:15 +00:00
|
|
|
|
2020-11-15 16:58:51 +00:00
|
|
|
if location is not None and all(location):
|
|
|
|
ipcachewrite(ip_addr, location)
|
2020-11-01 22:31:15 +00:00
|
|
|
# cache write used to happen before workaround, preserve that
|
2020-11-15 16:58:51 +00:00
|
|
|
location[2] = workaround(location[2])
|
|
|
|
return location[:3] # city, region, country
|
|
|
|
# ccode is cached but not needed for location
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
# temporary disabled it because of geoip services capcacity
|
|
|
|
#
|
|
|
|
#if city is None and response.location:
|
|
|
|
# coord = "%s, %s" % (response.location.latitude, response.location.longitude)
|
|
|
|
# try:
|
|
|
|
# location = geolocator.reverse(coord, language='en')
|
|
|
|
# city = location.raw.get('address', {}).get('city')
|
|
|
|
# except:
|
|
|
|
# pass
|
2018-10-26 22:19:22 +00:00
|
|
|
|
2020-07-19 18:57:18 +00:00
|
|
|
# No methods resulted in a location - return default
|
2020-11-01 19:50:05 +00:00
|
|
|
return NOT_FOUND_LOCATION, None, None
|
2018-10-07 10:11:59 +00:00
|
|
|
|
|
|
|
|
|
|
|
def location_canonical_name(location):
|
|
|
|
"Find canonical name for `location`"
|
|
|
|
|
|
|
|
location = location_normalize(location)
|
2019-02-04 11:12:02 +00:00
|
|
|
if location.lower() in LOCATION_ALIAS:
|
2018-10-07 10:11:59 +00:00
|
|
|
return LOCATION_ALIAS[location.lower()]
|
|
|
|
return location
|
|
|
|
|
|
|
|
def load_aliases(aliases_filename):
|
|
|
|
"""
|
|
|
|
Load aliases from the aliases file
|
|
|
|
"""
|
|
|
|
aliases_db = {}
|
|
|
|
with open(aliases_filename, 'r') as f_aliases:
|
|
|
|
for line in f_aliases.readlines():
|
2020-02-12 20:44:56 +00:00
|
|
|
try:
|
|
|
|
from_, to_ = line.decode('utf-8').split(':', 1)
|
|
|
|
except AttributeError:
|
|
|
|
from_, to_ = line.split(':', 1)
|
|
|
|
|
2018-10-07 10:11:59 +00:00
|
|
|
aliases_db[location_normalize(from_)] = location_normalize(to_)
|
|
|
|
return aliases_db
|
|
|
|
|
|
|
|
def load_iata_codes(iata_codes_filename):
|
|
|
|
"""
|
|
|
|
Load IATA codes from the IATA codes file
|
|
|
|
"""
|
|
|
|
with open(iata_codes_filename, 'r') as f_iata_codes:
|
|
|
|
result = []
|
|
|
|
for line in f_iata_codes.readlines():
|
|
|
|
result.append(line.strip())
|
|
|
|
return set(result)
|
|
|
|
|
|
|
|
LOCATION_ALIAS = load_aliases(ALIASES)
|
|
|
|
LOCATION_BLACK_LIST = [x.strip() for x in open(BLACKLIST, 'r').readlines()]
|
|
|
|
IATA_CODES = load_iata_codes(IATA_CODES_FILE)
|
|
|
|
|
|
|
|
def is_location_blocked(location):
|
2018-10-07 10:34:36 +00:00
|
|
|
"""
|
|
|
|
Return True if this location is blocked
|
|
|
|
or False if it is allowed
|
|
|
|
"""
|
2018-10-07 10:11:59 +00:00
|
|
|
return location is not None and location.lower() in LOCATION_BLACK_LIST
|
|
|
|
|
|
|
|
|
2020-09-20 09:09:57 +00:00
|
|
|
def get_hemisphere(location):
|
|
|
|
"""
|
|
|
|
Return hemisphere of the location (True = North, False = South).
|
|
|
|
Assume North and return True if location can't be found.
|
|
|
|
"""
|
2020-11-02 02:04:14 +00:00
|
|
|
if all(location):
|
|
|
|
location_string = ", ".join(location)
|
|
|
|
|
2020-09-20 09:09:57 +00:00
|
|
|
geolocation = geolocator(location_string)
|
|
|
|
if geolocation is None:
|
|
|
|
return True
|
|
|
|
return geolocation["latitude"] > 0
|
|
|
|
|
2018-10-07 10:34:36 +00:00
|
|
|
def location_processing(location, ip_addr):
|
|
|
|
"""
|
|
|
|
"""
|
2018-10-07 10:11:59 +00:00
|
|
|
|
2018-10-07 10:34:36 +00:00
|
|
|
# if location is starting with ~
|
|
|
|
# or has non ascii symbols
|
|
|
|
# it should be handled like a search term (for geolocator)
|
|
|
|
override_location_name = None
|
|
|
|
full_address = None
|
2019-01-11 09:58:08 +00:00
|
|
|
hide_full_address = False
|
|
|
|
force_show_full_address = location is not None and location.startswith('~')
|
2018-10-07 10:34:36 +00:00
|
|
|
|
2019-01-11 09:58:08 +00:00
|
|
|
# location ~ means that it should be detected automatically,
|
|
|
|
# and shown in the location line below the report
|
|
|
|
if location == '~':
|
|
|
|
location = None
|
|
|
|
|
2019-02-01 22:32:17 +00:00
|
|
|
if location and location.lstrip('~ ').startswith('@'):
|
2019-01-11 09:58:08 +00:00
|
|
|
try:
|
2020-11-01 22:27:13 +00:00
|
|
|
location, region, country = get_location(
|
2019-01-11 09:58:08 +00:00
|
|
|
socket.gethostbyname(
|
2019-02-01 22:32:17 +00:00
|
|
|
location.lstrip('~ ')[1:]))
|
2019-01-11 09:58:08 +00:00
|
|
|
location = '~' + location
|
2020-11-01 23:00:09 +00:00
|
|
|
if region and country:
|
|
|
|
location += ", %s, %s" % (region, country)
|
2019-01-11 09:58:08 +00:00
|
|
|
hide_full_address = not force_show_full_address
|
|
|
|
except:
|
2020-11-01 22:27:13 +00:00
|
|
|
location, region, country = NOT_FOUND_LOCATION, None, None
|
2019-01-11 09:58:08 +00:00
|
|
|
|
|
|
|
query_source_location = get_location(ip_addr)
|
|
|
|
|
2020-09-20 09:09:57 +00:00
|
|
|
# For moon queries, hemisphere must be found
|
|
|
|
# True for North, False for South
|
|
|
|
hemisphere = False
|
|
|
|
if location is not None and (location.lower()+"@").startswith("moon@"):
|
|
|
|
hemisphere = get_hemisphere(query_source_location)
|
|
|
|
|
2019-01-11 09:58:08 +00:00
|
|
|
country = None
|
2019-07-07 16:39:53 +00:00
|
|
|
if not location or location == 'MyLocation':
|
|
|
|
location = ip_addr
|
2019-01-11 09:58:08 +00:00
|
|
|
|
2019-07-07 16:39:53 +00:00
|
|
|
if is_ip(location):
|
2020-11-01 22:27:13 +00:00
|
|
|
location, region, country = get_location(location)
|
|
|
|
# location is just city here
|
2019-01-11 09:58:08 +00:00
|
|
|
|
|
|
|
# here too
|
|
|
|
if location:
|
|
|
|
location = '~' + location
|
2020-11-01 23:00:09 +00:00
|
|
|
if region and country:
|
2020-11-01 22:28:24 +00:00
|
|
|
location += ", %s, %s" % (region, country)
|
2019-01-11 09:58:08 +00:00
|
|
|
hide_full_address = not force_show_full_address
|
|
|
|
|
|
|
|
if location and not location.startswith('~'):
|
2019-02-01 22:32:17 +00:00
|
|
|
tmp_location = location_canonical_name(location)
|
|
|
|
if tmp_location != location:
|
|
|
|
override_location_name = location
|
2019-02-04 11:12:02 +00:00
|
|
|
location = tmp_location
|
2019-01-11 09:58:08 +00:00
|
|
|
|
|
|
|
# up to this point it is possible that the name
|
|
|
|
# contains some unicode symbols
|
|
|
|
# here we resolve them
|
2019-08-25 18:37:54 +00:00
|
|
|
if location is not None: # and not ascii_only(location):
|
2019-02-01 22:32:17 +00:00
|
|
|
location = "~" + location.lstrip('~ ')
|
2019-09-07 13:06:40 +00:00
|
|
|
if not override_location_name:
|
|
|
|
override_location_name = location.lstrip('~')
|
2018-10-07 10:34:36 +00:00
|
|
|
|
2019-08-25 18:37:54 +00:00
|
|
|
# if location is not None and location.upper() in IATA_CODES:
|
|
|
|
# location = '~%s' % location
|
2018-10-07 10:34:36 +00:00
|
|
|
|
2020-04-27 18:40:13 +00:00
|
|
|
if location is not None and not location.startswith("~-,") and location.startswith('~'):
|
2018-10-07 10:34:36 +00:00
|
|
|
geolocation = geolocator(location_canonical_name(location[1:]))
|
|
|
|
if geolocation is not None:
|
2019-09-07 13:06:40 +00:00
|
|
|
if not override_location_name:
|
|
|
|
override_location_name = location[1:].replace('+', ' ')
|
2018-10-07 10:34:36 +00:00
|
|
|
location = "%s,%s" % (geolocation['latitude'], geolocation['longitude'])
|
2019-01-11 16:27:14 +00:00
|
|
|
country = None
|
2019-01-11 09:58:08 +00:00
|
|
|
if not hide_full_address:
|
|
|
|
full_address = geolocation['address']
|
|
|
|
else:
|
|
|
|
full_address = None
|
2018-10-07 10:34:36 +00:00
|
|
|
else:
|
|
|
|
location = NOT_FOUND_LOCATION #location[1:]
|
2018-10-26 22:19:22 +00:00
|
|
|
|
2019-09-07 13:06:40 +00:00
|
|
|
|
2018-10-07 10:34:36 +00:00
|
|
|
return location, \
|
|
|
|
override_location_name, \
|
|
|
|
full_address, \
|
|
|
|
country, \
|
2020-09-20 09:09:57 +00:00
|
|
|
query_source_location, \
|
|
|
|
hemisphere
|