2018-10-23 18:04:25 +00:00
|
|
|
|
#vim: fileencoding=utf-8
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
The proxy server acts as a backend for the wttr.in service.
|
|
|
|
|
|
|
|
|
|
It caches the answers and handles various data sources transforming their
|
|
|
|
|
answers into format supported by the wttr.in service.
|
|
|
|
|
|
2020-04-26 17:36:47 +00:00
|
|
|
|
If WTTRIN_TEST is specified, it works in a special test mode:
|
|
|
|
|
it does not fetch and does not store the data in the cache,
|
|
|
|
|
but is using the fake data from "test/proxy-data".
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
2018-12-02 08:41:29 +00:00
|
|
|
|
from __future__ import print_function
|
2018-10-23 18:04:25 +00:00
|
|
|
|
|
|
|
|
|
from gevent.pywsgi import WSGIServer
|
2018-10-02 21:56:11 +00:00
|
|
|
|
from gevent.monkey import patch_all
|
2020-05-10 15:06:39 +00:00
|
|
|
|
patch_all()
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
# pylint: disable=wrong-import-position,wrong-import-order
|
|
|
|
|
import sys
|
2018-10-02 21:56:11 +00:00
|
|
|
|
import os
|
|
|
|
|
import time
|
|
|
|
|
import json
|
2020-05-01 14:07:23 +00:00
|
|
|
|
import hashlib
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
import cyrtranslit
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
from flask import Flask, request
|
|
|
|
|
APP = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MYDIR = os.path.abspath(
|
|
|
|
|
os.path.dirname(os.path.dirname('__file__')))
|
|
|
|
|
sys.path.append("%s/lib/" % MYDIR)
|
|
|
|
|
|
2022-10-25 17:18:07 +00:00
|
|
|
|
import proxy_log
|
|
|
|
|
from globals import PROXY_CACHEDIR, PROXY_HOST, PROXY_PORT, USE_METNO, USER_AGENT, MISSING_TRANSLATION_LOG, PROXY_LOG_FILE
|
2020-10-08 21:09:58 +00:00
|
|
|
|
from metno import create_standard_json_from_metno, metno_request
|
2018-10-23 18:04:25 +00:00
|
|
|
|
from translations import PROXY_LANGS
|
|
|
|
|
# pylint: enable=wrong-import-position
|
|
|
|
|
|
2022-10-25 17:18:07 +00:00
|
|
|
|
proxy_logger = proxy_log.LoggerWWO(PROXY_LOG_FILE)
|
|
|
|
|
|
2020-04-26 17:36:47 +00:00
|
|
|
|
def is_testmode():
|
|
|
|
|
"""Server is running in the wttr.in test mode"""
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2020-04-26 17:36:47 +00:00
|
|
|
|
return "WTTRIN_TEST" in os.environ
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
def load_translations():
|
|
|
|
|
"""
|
|
|
|
|
load all translations
|
|
|
|
|
"""
|
|
|
|
|
translations = {}
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
for f_name in PROXY_LANGS:
|
2018-10-02 21:56:11 +00:00
|
|
|
|
f_name = 'share/translations/%s.txt' % f_name
|
|
|
|
|
translation = {}
|
|
|
|
|
lang = f_name.split('/')[-1].split('.', 1)[0]
|
|
|
|
|
with open(f_name, "r") as f_file:
|
|
|
|
|
for line in f_file:
|
|
|
|
|
if ':' not in line:
|
|
|
|
|
continue
|
|
|
|
|
if line.count(':') == 3:
|
|
|
|
|
_, trans, orig, _ = line.strip().split(':', 4)
|
|
|
|
|
else:
|
|
|
|
|
_, trans, orig = line.strip().split(':', 3)
|
|
|
|
|
trans = trans.strip()
|
|
|
|
|
orig = orig.strip()
|
|
|
|
|
|
2021-01-02 21:57:49 +00:00
|
|
|
|
translation[orig.lower()] = trans
|
|
|
|
|
translations[lang] = translation
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return translations
|
|
|
|
|
TRANSLATIONS = load_translations()
|
|
|
|
|
|
2020-10-08 21:09:58 +00:00
|
|
|
|
def _is_metno():
|
|
|
|
|
return USE_METNO
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
def _find_srv_for_query(path, query): # pylint: disable=unused-argument
|
2020-10-08 21:09:58 +00:00
|
|
|
|
if _is_metno():
|
|
|
|
|
return 'https://api.met.no'
|
2019-08-25 18:28:19 +00:00
|
|
|
|
return 'http://api.worldweatheronline.com'
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2020-05-01 14:07:23 +00:00
|
|
|
|
def _cache_file(path, query):
|
|
|
|
|
"""Return cache file name for specified `path` and `query`
|
|
|
|
|
and for the current time.
|
|
|
|
|
|
|
|
|
|
Do smooth load on the server, expiration time
|
|
|
|
|
is slightly varied basing on the path+query sha1 hash digest.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
digest = hashlib.sha1(("%s %s" % (path, query)).encode("utf-8")).hexdigest()
|
2020-05-07 07:43:42 +00:00
|
|
|
|
digest_number = ord(digest[0].upper())
|
2022-10-25 17:21:11 +00:00
|
|
|
|
expiry_interval = 60*(digest_number+180)
|
2020-05-01 14:07:23 +00:00
|
|
|
|
|
2020-05-07 07:43:42 +00:00
|
|
|
|
timestamp = "%010d" % (int(time.time())//expiry_interval*expiry_interval)
|
2020-05-01 14:07:23 +00:00
|
|
|
|
filename = os.path.join(PROXY_CACHEDIR, timestamp, path, query)
|
|
|
|
|
|
|
|
|
|
return filename
|
|
|
|
|
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
def _load_content_and_headers(path, query):
|
2020-04-26 17:36:47 +00:00
|
|
|
|
if is_testmode():
|
|
|
|
|
cache_file = "test/proxy-data/data1"
|
|
|
|
|
else:
|
2020-05-01 14:07:23 +00:00
|
|
|
|
cache_file = _cache_file(path, query)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
try:
|
2018-10-23 18:04:25 +00:00
|
|
|
|
return (open(cache_file, 'r').read(),
|
|
|
|
|
json.loads(open(cache_file+".headers", 'r').read()))
|
|
|
|
|
except IOError:
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return None, None
|
|
|
|
|
|
2020-05-01 14:07:23 +00:00
|
|
|
|
def _touch_empty_file(path, query):
|
|
|
|
|
cache_file = _cache_file(path, query)
|
2019-08-25 18:37:24 +00:00
|
|
|
|
cache_dir = os.path.dirname(cache_file)
|
|
|
|
|
if not os.path.exists(cache_dir):
|
|
|
|
|
os.makedirs(cache_dir)
|
|
|
|
|
open(cache_file, 'w').write("")
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
def _save_content_and_headers(path, query, content, headers):
|
2020-05-01 14:07:23 +00:00
|
|
|
|
cache_file = _cache_file(path, query)
|
2018-10-23 18:04:25 +00:00
|
|
|
|
cache_dir = os.path.dirname(cache_file)
|
|
|
|
|
if not os.path.exists(cache_dir):
|
|
|
|
|
os.makedirs(cache_dir)
|
|
|
|
|
open(cache_file + ".headers", 'w').write(json.dumps(headers))
|
2020-05-01 15:49:41 +00:00
|
|
|
|
open(cache_file, 'wb').write(content)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
def translate(text, lang):
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
2021-01-02 21:48:17 +00:00
|
|
|
|
Translate `text` into `lang`.
|
|
|
|
|
If `text` is comma-separated, translate each term independently.
|
|
|
|
|
If no translation found, leave it untouched.
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
2021-01-02 21:48:17 +00:00
|
|
|
|
|
2021-04-21 18:03:17 +00:00
|
|
|
|
def _log_unknown_translation(lang, text):
|
2021-04-26 18:56:16 +00:00
|
|
|
|
with open(MISSING_TRANSLATION_LOG % lang, "a") as f_missing_translation:
|
2021-10-30 15:47:43 +00:00
|
|
|
|
f_missing_translation.write(text+"\n")
|
2021-04-21 18:03:17 +00:00
|
|
|
|
|
2021-01-02 21:48:17 +00:00
|
|
|
|
if "," in text:
|
|
|
|
|
terms = text.split(",")
|
|
|
|
|
translated_terms = [translate(term.strip(), lang) for term in terms]
|
|
|
|
|
return ", ".join(translated_terms)
|
|
|
|
|
|
2021-04-21 18:03:17 +00:00
|
|
|
|
if lang not in TRANSLATIONS:
|
|
|
|
|
_log_unknown_translation(lang, "UNKNOWN_LANGUAGE")
|
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
|
if text.lower() not in TRANSLATIONS.get(lang, {}):
|
|
|
|
|
_log_unknown_translation(lang, text)
|
|
|
|
|
return text
|
|
|
|
|
|
2021-01-02 21:43:18 +00:00
|
|
|
|
translated = TRANSLATIONS.get(lang, {}).get(text.lower(), text)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return translated
|
|
|
|
|
|
|
|
|
|
def cyr(to_translate):
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
|
|
|
|
Transliterate `to_translate` from latin into cyrillic
|
|
|
|
|
"""
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return cyrtranslit.to_cyrillic(to_translate)
|
|
|
|
|
|
2018-10-23 18:04:25 +00:00
|
|
|
|
def _patch_greek(original):
|
2020-04-26 17:36:47 +00:00
|
|
|
|
return original.replace(u"Ηλιόλουστη/ο", u"Ηλιόλουστη")
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
def add_translations(content, lang):
|
2018-10-23 18:04:25 +00:00
|
|
|
|
"""
|
|
|
|
|
Add `lang` translation to `content` (JSON)
|
|
|
|
|
returned by the data source
|
|
|
|
|
"""
|
2020-05-01 15:49:41 +00:00
|
|
|
|
|
2020-05-24 17:25:45 +00:00
|
|
|
|
if content == "{}":
|
2020-05-01 15:49:41 +00:00
|
|
|
|
return {}
|
|
|
|
|
|
2018-10-02 21:56:11 +00:00
|
|
|
|
languages_to_translate = TRANSLATIONS.keys()
|
|
|
|
|
try:
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d = json.loads(content) # pylint: disable=invalid-name
|
2020-05-01 15:49:41 +00:00
|
|
|
|
except (ValueError, TypeError) as exception:
|
2018-12-02 08:41:29 +00:00
|
|
|
|
print("---")
|
|
|
|
|
print(exception)
|
|
|
|
|
print("---")
|
2020-05-01 15:49:41 +00:00
|
|
|
|
return {}
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
try:
|
2020-05-20 12:14:33 +00:00
|
|
|
|
weather_condition = d['data']['current_condition'
|
|
|
|
|
][0]['weatherDesc'][0]['value'].capitalize()
|
|
|
|
|
d['data']['current_condition'][0]['weatherDesc'][0]['value'] = \
|
|
|
|
|
weather_condition
|
2018-10-02 21:56:11 +00:00
|
|
|
|
if lang in languages_to_translate:
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang] = \
|
|
|
|
|
[{'value': translate(weather_condition, lang)}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'sr':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang] = \
|
|
|
|
|
[{'value': cyr(
|
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang][0]['value']\
|
2020-04-26 17:36:47 +00:00
|
|
|
|
)}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'el':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang] = \
|
|
|
|
|
[{'value': _patch_greek(
|
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang][0]['value']\
|
2020-04-26 17:36:47 +00:00
|
|
|
|
)}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'sr-lat':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d['data']['current_condition'][0]['lang_%s' % lang] = \
|
|
|
|
|
[{'value':d['data']['current_condition'][0]['lang_sr'][0]['value']\
|
2020-04-26 17:36:47 +00:00
|
|
|
|
}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
fixed_weather = []
|
2018-10-23 18:04:25 +00:00
|
|
|
|
for w in d['data']['weather']: # pylint: disable=invalid-name
|
2018-10-02 21:56:11 +00:00
|
|
|
|
fixed_hourly = []
|
2018-10-23 18:04:25 +00:00
|
|
|
|
for h in w['hourly']: # pylint: disable=invalid-name
|
2018-10-02 21:56:11 +00:00
|
|
|
|
weather_condition = h['weatherDesc'][0]['value']
|
|
|
|
|
if lang in languages_to_translate:
|
2018-10-23 18:04:25 +00:00
|
|
|
|
h['lang_%s' % lang] = \
|
|
|
|
|
[{'value': translate(weather_condition, lang)}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'sr':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
h['lang_%s' % lang] = \
|
2020-04-26 17:36:47 +00:00
|
|
|
|
[{'value': cyr(h['lang_%s' % lang][0]['value'])}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'el':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
h['lang_%s' % lang] = \
|
2020-04-26 17:36:47 +00:00
|
|
|
|
[{'value': _patch_greek(h['lang_%s' % lang][0]['value'])}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
elif lang == 'sr-lat':
|
2018-10-23 18:04:25 +00:00
|
|
|
|
h['lang_%s' % lang] = \
|
2020-04-26 17:36:47 +00:00
|
|
|
|
[{'value': h['lang_sr'][0]['value']}]
|
2018-10-02 21:56:11 +00:00
|
|
|
|
fixed_hourly.append(h)
|
|
|
|
|
w['hourly'] = fixed_hourly
|
|
|
|
|
fixed_weather.append(w)
|
2018-10-23 18:04:25 +00:00
|
|
|
|
d['data']['weather'] = fixed_weather
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
content = json.dumps(d)
|
2018-10-23 18:04:25 +00:00
|
|
|
|
except (IndexError, ValueError) as exception:
|
2018-12-02 08:41:29 +00:00
|
|
|
|
print(exception)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return content
|
|
|
|
|
|
2020-10-08 21:09:58 +00:00
|
|
|
|
def _fetch_content_and_headers(path, query_string, **kwargs):
|
2018-10-23 18:04:25 +00:00
|
|
|
|
content, headers = _load_content_and_headers(path, query_string)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
|
|
|
|
if content is None:
|
2018-10-23 18:04:25 +00:00
|
|
|
|
srv = _find_srv_for_query(path, query_string)
|
2022-10-25 17:18:07 +00:00
|
|
|
|
url = "%s/%s?%s" % (srv, path, query_string)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2019-08-28 11:58:36 +00:00
|
|
|
|
attempts = 10
|
2019-08-25 18:37:24 +00:00
|
|
|
|
response = None
|
2022-10-25 17:18:07 +00:00
|
|
|
|
error = ""
|
2018-10-02 21:56:11 +00:00
|
|
|
|
while attempts:
|
2019-08-28 11:58:36 +00:00
|
|
|
|
try:
|
2020-10-08 21:09:58 +00:00
|
|
|
|
response = requests.get(url, timeout=2, **kwargs)
|
2019-08-28 11:58:36 +00:00
|
|
|
|
except requests.ReadTimeout:
|
|
|
|
|
attempts -= 1
|
|
|
|
|
continue
|
2018-10-02 21:56:11 +00:00
|
|
|
|
try:
|
2022-10-25 17:18:07 +00:00
|
|
|
|
data = json.loads(response.content)
|
|
|
|
|
error = data.get("data", {}).get("error", "")
|
|
|
|
|
try:
|
|
|
|
|
error = error[0]["msg"]
|
|
|
|
|
except (ValueError, IndexError):
|
|
|
|
|
error = "invalid error format: %s" % error
|
2018-10-02 21:56:11 +00:00
|
|
|
|
break
|
2018-10-23 18:04:25 +00:00
|
|
|
|
except ValueError:
|
2018-10-02 21:56:11 +00:00
|
|
|
|
attempts -= 1
|
2022-10-25 17:18:07 +00:00
|
|
|
|
error = "invalid response"
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2022-10-25 17:18:07 +00:00
|
|
|
|
proxy_logger.log(query_string, error)
|
2020-05-01 14:07:23 +00:00
|
|
|
|
_touch_empty_file(path, query_string)
|
2019-08-25 18:37:24 +00:00
|
|
|
|
if response:
|
|
|
|
|
headers = {}
|
|
|
|
|
headers['Content-Type'] = response.headers['content-type']
|
2019-12-28 17:08:06 +00:00
|
|
|
|
_save_content_and_headers(path, query_string, response.content, headers)
|
2020-05-01 15:49:41 +00:00
|
|
|
|
content = response.content
|
2019-08-25 18:37:24 +00:00
|
|
|
|
else:
|
|
|
|
|
content = "{}"
|
2020-05-07 07:43:42 +00:00
|
|
|
|
else:
|
|
|
|
|
print("cache found")
|
2020-10-08 21:09:58 +00:00
|
|
|
|
return content, headers
|
|
|
|
|
|
|
|
|
|
|
2022-10-25 17:20:49 +00:00
|
|
|
|
def _make_query(path, query_string):
|
2020-10-08 21:09:58 +00:00
|
|
|
|
|
|
|
|
|
if _is_metno():
|
|
|
|
|
path, query, days = metno_request(path, query_string)
|
|
|
|
|
if USER_AGENT == '':
|
|
|
|
|
raise ValueError('User agent must be set to adhere to metno ToS: https://api.met.no/doc/TermsOfService')
|
|
|
|
|
content, headers = _fetch_content_and_headers(path, query, headers={
|
|
|
|
|
'User-Agent': USER_AGENT
|
|
|
|
|
})
|
|
|
|
|
content = create_standard_json_from_metno(content, days)
|
|
|
|
|
else:
|
|
|
|
|
# WWO tweaks
|
|
|
|
|
query_string += "&extra=localObsTime"
|
|
|
|
|
query_string += "&includelocation=yes"
|
2021-01-02 21:57:49 +00:00
|
|
|
|
content, headers = _fetch_content_and_headers(path, query_string)
|
2018-10-02 21:56:11 +00:00
|
|
|
|
|
2022-10-25 17:20:49 +00:00
|
|
|
|
return content, headers
|
|
|
|
|
|
|
|
|
|
@APP.route("/<path:path>")
|
|
|
|
|
def proxy(path):
|
|
|
|
|
"""
|
|
|
|
|
Main proxy function. Handles incoming HTTP queries.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
lang = request.args.get('lang', 'en')
|
|
|
|
|
query_string = request.query_string.decode("utf-8")
|
|
|
|
|
query_string = query_string.replace('sr-lat', 'sr')
|
|
|
|
|
query_string = query_string.replace('lang=None', 'lang=en')
|
|
|
|
|
content = ""
|
|
|
|
|
headers = ""
|
|
|
|
|
|
|
|
|
|
content, headers = _make_query(path, query_string)
|
|
|
|
|
|
|
|
|
|
# _log_query(path, query_string, error)
|
|
|
|
|
|
2020-04-26 17:36:47 +00:00
|
|
|
|
content = add_translations(content, lang)
|
|
|
|
|
|
2018-10-02 21:56:11 +00:00
|
|
|
|
return content, 200, headers
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
#app.run(host='0.0.0.0', port=5001, debug=False)
|
|
|
|
|
#app.debug = True
|
2022-10-25 17:20:49 +00:00
|
|
|
|
|
2020-10-08 21:09:58 +00:00
|
|
|
|
if len(sys.argv) == 1:
|
|
|
|
|
bind_addr = "0.0.0.0"
|
|
|
|
|
SERVER = WSGIServer((bind_addr, PROXY_PORT), APP)
|
|
|
|
|
SERVER.serve_forever()
|
|
|
|
|
else:
|
|
|
|
|
print('running single request from command line arg')
|
|
|
|
|
APP.testing = True
|
|
|
|
|
with APP.test_client() as c:
|
|
|
|
|
resp = c.get(sys.argv[1])
|
|
|
|
|
print('Status: ' + resp.status)
|
|
|
|
|
# print('Headers: ' + dumps(resp.headers))
|
|
|
|
|
print(resp.data.decode('utf-8'))
|