Plex-Meta-Manager/modules/tvdb.py

283 lines
15 KiB
Python
Raw Normal View History

2024-05-28 20:22:51 +00:00
import re, time
from datetime import datetime
2024-01-04 20:24:53 +00:00
from lxml import html
2022-01-28 16:11:09 +00:00
from lxml.etree import ParserError
2021-01-20 21:37:59 +00:00
from modules import util
from modules.util import Failed
2024-05-28 20:22:51 +00:00
from requests.exceptions import MissingSchema
from tenacity import retry, stop_after_attempt, wait_fixed, retry_if_not_exception_type
2021-01-20 21:37:59 +00:00
logger = util.logger
2021-01-20 21:37:59 +00:00
2021-07-23 19:44:21 +00:00
builders = ["tvdb_list", "tvdb_list_details", "tvdb_movie", "tvdb_movie_details", "tvdb_show", "tvdb_show_details"]
2021-07-14 14:47:20 +00:00
base_url = "https://www.thetvdb.com"
alt_url = "https://thetvdb.com"
urls = {
2021-07-23 19:44:21 +00:00
"list": f"{base_url}/lists/", "alt_list": f"{alt_url}/lists/",
"series": f"{base_url}/series/", "alt_series": f"{alt_url}/series/",
"movies": f"{base_url}/movies/", "alt_movies": f"{alt_url}/movies/",
"series_id": f"{base_url}/dereferrer/series/", "movie_id": f"{base_url}/dereferrer/movie/"
2021-07-14 14:47:20 +00:00
}
2021-11-06 03:54:12 +00:00
language_translation = {
"ab": "abk", "aa": "aar", "af": "afr", "ak": "aka", "sq": "sqi", "am": "amh", "ar": "ara", "an": "arg", "hy": "hye",
"as": "asm", "av": "ava", "ae": "ave", "ay": "aym", "az": "aze", "bm": "bam", "ba": "bak", "eu": "eus", "be": "bel",
"bn": "ben", "bi": "bis", "bs": "bos", "br": "bre", "bg": "bul", "my": "mya", "ca": "cat", "ch": "cha", "ce": "che",
"ny": "nya", "zh": "zho", "cv": "chv", "kw": "cor", "co": "cos", "cr": "cre", "hr": "hrv", "cs": "ces", "da": "dan",
"dv": "div", "nl": "nld", "dz": "dzo", "en": "eng", "eo": "epo", "et": "est", "ee": "ewe", "fo": "fao", "fj": "fij",
"fi": "fin", "fr": "fra", "ff": "ful", "gl": "glg", "ka": "kat", "de": "deu", "el": "ell", "gn": "grn", "gu": "guj",
"ht": "hat", "ha": "hau", "he": "heb", "hz": "her", "hi": "hin", "ho": "hmo", "hu": "hun", "ia": "ina", "id": "ind",
"ie": "ile", "ga": "gle", "ig": "ibo", "ik": "ipk", "io": "ido", "is": "isl", "it": "ita", "iu": "iku", "ja": "jpn",
"jv": "jav", "kl": "kal", "kn": "kan", "kr": "kau", "ks": "kas", "kk": "kaz", "km": "khm", "ki": "kik", "rw": "kin",
"ky": "kir", "kv": "kom", "kg": "kon", "ko": "kor", "ku": "kur", "kj": "kua", "la": "lat", "lb": "ltz", "lg": "lug",
"li": "lim", "ln": "lin", "lo": "lao", "lt": "lit", "lu": "lub", "lv": "lav", "gv": "glv", "mk": "mkd", "mg": "mlg",
"ms": "msa", "ml": "mal", "mt": "mlt", "mi": "mri", "mr": "mar", "mh": "mah", "mn": "mon", "na": "nau", "nv": "nav",
"nd": "nde", "ne": "nep", "ng": "ndo", "nb": "nob", "nn": "nno", "no": "nor", "ii": "iii", "nr": "nbl", "oc": "oci",
"oj": "oji", "cu": "chu", "om": "orm", "or": "ori", "os": "oss", "pa": "pan", "pi": "pli", "fa": "fas", "pl": "pol",
"ps": "pus", "pt": "por", "qu": "que", "rm": "roh", "rn": "run", "ro": "ron", "ru": "rus", "sa": "san", "sc": "srd",
"sd": "snd", "se": "sme", "sm": "smo", "sg": "sag", "sr": "srp", "gd": "gla", "sn": "sna", "si": "sin", "sk": "slk",
"sl": "slv", "so": "som", "st": "sot", "es": "spa", "su": "sun", "sw": "swa", "ss": "ssw", "sv": "swe", "ta": "tam",
"te": "tel", "tg": "tgk", "th": "tha", "ti": "tir", "bo": "bod", "tk": "tuk", "tl": "tgl", "tn": "tsn", "to": "ton",
"tr": "tur", "ts": "tso", "tt": "tat", "tw": "twi", "ty": "tah", "ug": "uig", "uk": "ukr", "ur": "urd", "uz": "uzb",
"ve": "ven", "vi": "vie", "vo": "vol", "wa": "wln", "cy": "cym", "wo": "wol", "fy": "fry", "xh": "xho", "yi": "yid",
"yo": "yor", "za": "zha", "zu": "zul"}
2021-03-30 05:50:53 +00:00
2021-01-20 21:37:59 +00:00
class TVDbObj:
2022-05-05 22:05:16 +00:00
def __init__(self, tvdb, tvdb_id, is_movie=False, ignore_cache=False):
self._tvdb = tvdb
self.tvdb_id = tvdb_id
2021-07-14 14:47:20 +00:00
self.is_movie = is_movie
2022-05-05 22:05:16 +00:00
self.ignore_cache = ignore_cache
expired = None
data = None
2024-05-28 20:22:51 +00:00
if self._tvdb.cache and not ignore_cache:
data, expired = self._tvdb.cache.query_tvdb(tvdb_id, is_movie, self._tvdb.expiration)
2022-05-05 22:05:16 +00:00
if expired or not data:
2024-01-04 20:24:53 +00:00
item_url = f"{urls['movie_id' if is_movie else 'series_id']}{tvdb_id}"
try:
data = self._tvdb.get_request(item_url)
except Failed:
raise Failed(f"TVDb Error: No {'Movie' if is_movie else 'Series'} found for TVDb ID: {tvdb_id} at {item_url}")
2021-01-20 21:37:59 +00:00
2022-03-14 03:39:47 +00:00
def parse_page(xpath, is_list=False):
2022-05-05 22:05:16 +00:00
parse_results = data.xpath(xpath)
if len(parse_results) > 0:
parse_results = [r.strip() for r in parse_results if len(r) > 0]
2022-03-14 03:39:47 +00:00
return parse_results if is_list else parse_results[0] if len(parse_results) > 0 else None
2021-10-26 15:01:08 +00:00
2021-11-06 03:54:12 +00:00
def parse_title_summary(lang=None):
place = "//div[@class='change_translation_text' and "
2021-11-14 04:33:42 +00:00
place += f"@data-language='{lang}']" if lang else "not(@style='display:none')]"
return parse_page(f"{place}/@data-title"), parse_page(f"{place}/p/text()[normalize-space()]")
2021-11-06 03:54:12 +00:00
2022-05-05 22:05:16 +00:00
if isinstance(data, dict):
self.title = data["title"]
self.summary = data["summary"]
self.poster_url = data["poster_url"]
self.background_url = data["background_url"]
self.release_date = data["release_date"]
2024-04-03 12:45:43 +00:00
self.status = data["status"]
2022-05-05 22:05:16 +00:00
self.genres = data["genres"].split("|")
else:
2022-05-05 22:05:16 +00:00
self.title, self.summary = parse_title_summary(lang=self._tvdb.language)
if not self.title and self._tvdb.language in language_translation:
self.title, self.summary = parse_title_summary(lang=language_translation[self._tvdb.language])
if not self.title:
self.title, self.summary = parse_title_summary()
if not self.title:
raise Failed(f"TVDb Error: Name not found from TVDb ID: {self.tvdb_id}")
2022-12-19 21:36:51 +00:00
self.poster_url = parse_page("//div[@id='artwork-posters']/div/div/a/@href")
self.background_url = parse_page("//div[@id='artwork-backgrounds']/div/div/a/@href")
2022-05-05 22:05:16 +00:00
if is_movie:
released = parse_page("//strong[text()='Released']/parent::li/span/text()[normalize-space()]")
else:
released = parse_page("//strong[text()='First Aired']/parent::li/span/text()[normalize-space()]")
try:
2023-04-28 03:43:26 +00:00
self.release_date = datetime.strptime(released, "%B %d, %Y") if released else released # noqa
2022-05-05 22:05:16 +00:00
except ValueError:
self.release_date = None
2024-04-03 12:45:43 +00:00
self.status = parse_page("//strong[text()='Status']/parent::li/span/text()[normalize-space()]")
2022-05-05 22:05:16 +00:00
self.genres = parse_page("//strong[text()='Genres']/parent::li/span/a/text()[normalize-space()]", is_list=True)
2024-05-28 20:22:51 +00:00
if self._tvdb.cache and not ignore_cache:
self._tvdb.cache.update_tvdb(expired, self, self._tvdb.expiration)
2021-01-20 21:37:59 +00:00
2021-06-14 15:24:11 +00:00
class TVDb:
2024-05-28 20:22:51 +00:00
def __init__(self, requests, cache, tvdb_language, expiration):
self.requests = requests
self.cache = cache
2022-05-05 22:05:16 +00:00
self.language = tvdb_language
self.expiration = expiration
2021-01-20 21:37:59 +00:00
2022-05-05 22:05:16 +00:00
def get_tvdb_obj(self, tvdb_url, is_movie=False):
tvdb_id, _, _ = self.get_id_from_url(tvdb_url, is_movie=is_movie)
return TVDbObj(self, tvdb_id, is_movie=is_movie)
2021-03-08 19:53:05 +00:00
@retry(stop=stop_after_attempt(6), wait=wait_fixed(10), retry=retry_if_not_exception_type(Failed))
2022-05-05 22:05:16 +00:00
def get_request(self, tvdb_url):
2024-05-28 20:22:51 +00:00
response = self.requests.get(tvdb_url, language=self.language)
2024-01-04 20:24:53 +00:00
if response.status_code >= 400:
raise Failed(f"({response.status_code}) {response.reason}")
return html.fromstring(response.content)
2022-05-05 22:05:16 +00:00
def get_id_from_url(self, tvdb_url, is_movie=False, ignore_cache=False):
2021-03-08 19:53:05 +00:00
try:
2022-05-05 22:05:16 +00:00
if not is_movie:
return int(tvdb_url), None, None
else:
tvdb_url = f"{urls['movie_id']}{int(tvdb_url)}"
2021-03-08 19:53:05 +00:00
except ValueError:
pass
2022-05-05 22:05:16 +00:00
tvdb_url = tvdb_url.strip()
if tvdb_url.startswith((urls["series"], urls["alt_series"], urls["series_id"])):
media_type = "Series"
elif tvdb_url.startswith((urls["movies"], urls["alt_movies"], urls["movie_id"])):
media_type = "Movie"
else:
raise Failed(f"TVDb Error: {tvdb_url} must begin with {urls['movies']} or {urls['series']}")
expired = None
2024-05-28 20:22:51 +00:00
if self.cache and not ignore_cache and not is_movie:
tvdb_id, expired = self.cache.query_tvdb_map(tvdb_url, self.expiration)
2022-10-24 21:01:58 +00:00
if tvdb_id and not expired:
return tvdb_id, None, None
2022-09-07 19:32:52 +00:00
logger.trace(f"URL: {tvdb_url}")
2022-05-05 22:05:16 +00:00
try:
response = self.get_request(tvdb_url)
2024-01-04 20:24:53 +00:00
except (ParserError, Failed):
raise Failed(f"TVDb Error: Failed not parse {tvdb_url}")
2022-05-05 22:05:16 +00:00
results = response.xpath(f"//*[text()='TheTVDB.com {media_type} ID']/parent::node()/span/text()")
if len(results) > 0:
tvdb_id = int(results[0])
tmdb_id = None
imdb_id = None
if media_type == "Movie":
results = response.xpath("//*[text()='TheMovieDB.com']/@href")
if len(results) > 0:
try:
tmdb_id = util.regex_first_int(results[0], "TMDb ID")
except Failed:
pass
results = response.xpath("//*[text()='IMDB']/@href")
if len(results) > 0:
try:
imdb_id = util.get_id_from_imdb_url(results[0])
except Failed:
pass
if tmdb_id is None and imdb_id is None:
raise Failed(f"TVDb Error: No TMDb ID or IMDb ID found")
2024-05-28 20:22:51 +00:00
if self.cache and not ignore_cache and not is_movie:
self.cache.update_tvdb_map(expired, tvdb_url, tvdb_id, self.expiration)
2022-05-05 22:05:16 +00:00
return tvdb_id, tmdb_id, imdb_id
elif tvdb_url.startswith(urls["movie_id"]):
err_text = f"using TVDb Movie ID: {tvdb_url[len(urls['movie_id']):]}"
elif tvdb_url.startswith(urls["series_id"]):
err_text = f"using TVDb Series ID: {tvdb_url[len(urls['series_id']):]}"
else:
err_text = f"ID at the URL {tvdb_url}"
raise Failed(f"TVDb Error: Could not find a TVDb {media_type} {err_text}")
2021-03-08 17:02:40 +00:00
2022-12-19 21:36:51 +00:00
def get_list_description(self, tvdb_url):
2024-05-28 20:22:51 +00:00
response = self.requests.get_html(tvdb_url, language=self.language)
2022-12-19 21:36:51 +00:00
description = response.xpath("//div[@class='block']/div[not(@style='display:none')]/p/text()")
description = description[0] if len(description) > 0 and len(description[0]) > 0 else None
poster = response.xpath("//div[@id='artwork']/div/div/a/@href")
poster = poster[0] if len(poster) > 0 and len(poster[0]) > 0 else None
return description, poster
2021-10-26 15:01:08 +00:00
def _ids_from_url(self, tvdb_url):
2021-08-07 06:01:21 +00:00
ids = []
2021-01-20 21:37:59 +00:00
tvdb_url = tvdb_url.strip()
2022-09-07 19:32:52 +00:00
logger.trace(f"URL: {tvdb_url}")
2021-07-14 14:47:20 +00:00
if tvdb_url.startswith((urls["list"], urls["alt_list"])):
2021-01-20 21:37:59 +00:00
try:
2024-05-28 20:22:51 +00:00
response = self.requests.get_html(tvdb_url, language=self.language)
2022-12-19 21:36:51 +00:00
items = response.xpath("//div[@id='general']//div/div/h3/a")
2021-01-20 21:37:59 +00:00
for item in items:
2022-05-05 22:05:16 +00:00
title = item.xpath("text()")[0]
item_url = item.xpath("@href")[0]
2021-01-20 21:37:59 +00:00
if item_url.startswith("/series/"):
2021-04-22 23:51:03 +00:00
try:
2022-05-05 22:05:16 +00:00
tvdb_id, _, _ = self.get_id_from_url(f"{base_url}{item_url}")
if tvdb_id:
ids.append((tvdb_id, "tvdb"))
2021-04-22 23:51:03 +00:00
except Failed as e:
logger.error(f"{e} for series {title}")
2021-01-20 21:37:59 +00:00
elif item_url.startswith("/movies/"):
try:
2022-10-26 18:50:24 +00:00
_, tmdb_id, imdb_id = self.get_id_from_url(f"{base_url}{item_url}", is_movie=True)
2022-05-05 22:05:16 +00:00
if tmdb_id:
ids.append((tmdb_id, "tmdb"))
elif imdb_id:
ids.append((imdb_id, "imdb"))
2021-01-20 21:37:59 +00:00
except Failed as e:
2022-05-05 22:05:16 +00:00
logger.error(f"{e} for movie {title}")
2021-01-20 21:37:59 +00:00
else:
2021-02-24 06:44:06 +00:00
logger.error(f"TVDb Error: Skipping Movie: {title}")
2021-07-14 14:47:20 +00:00
time.sleep(2)
2021-08-07 06:01:21 +00:00
if len(ids) > 0:
return ids
2021-02-24 06:44:06 +00:00
raise Failed(f"TVDb Error: No TVDb IDs found at {tvdb_url}")
2024-05-28 20:22:51 +00:00
except MissingSchema:
logger.stacktrace()
2021-02-24 06:44:06 +00:00
raise Failed(f"TVDb Error: URL Lookup Failed for {tvdb_url}")
2021-01-20 21:37:59 +00:00
else:
2021-07-14 14:47:20 +00:00
raise Failed(f"TVDb Error: {tvdb_url} must begin with {urls['list']}")
2021-01-20 21:37:59 +00:00
2021-10-26 15:01:08 +00:00
def get_tvdb_ids(self, method, data):
2021-01-20 21:37:59 +00:00
if method == "tvdb_show":
2021-08-01 04:35:42 +00:00
logger.info(f"Processing TVDb Show: {data}")
2022-05-05 22:05:16 +00:00
ids = []
try:
tvdb_id, _, _ = self.get_id_from_url(data)
if tvdb_id:
ids.append((tvdb_id, "tvdb"))
except Failed as e:
logger.error(e)
return ids
2021-01-20 21:37:59 +00:00
elif method == "tvdb_movie":
2021-08-01 04:35:42 +00:00
logger.info(f"Processing TVDb Movie: {data}")
2022-05-05 22:05:16 +00:00
ids = []
try:
_, tmdb_id, imdb_id = self.get_id_from_url(data)
if tmdb_id:
ids.append((tmdb_id, "tmdb"))
elif imdb_id:
ids.append((imdb_id, "imdb"))
except Failed as e:
logger.error(e)
return ids
2021-01-20 21:37:59 +00:00
elif method == "tvdb_list":
2021-08-01 04:35:42 +00:00
logger.info(f"Processing TVDb List: {data}")
2021-10-26 15:01:08 +00:00
return self._ids_from_url(data)
2021-01-20 21:37:59 +00:00
else:
2021-02-24 06:44:06 +00:00
raise Failed(f"TVDb Error: Method {method} not supported")
2024-04-03 12:45:43 +00:00
def item_filter(self, item, filter_attr, modifier, filter_final, filter_data):
if filter_attr == "tvdb_title":
if util.is_string_filter([item.title], modifier, filter_data):
return False
elif filter_attr == "tvdb_status":
if util.is_string_filter([item.status], modifier, filter_data):
return False
elif filter_attr == "tvdb_genre":
attrs = item.genres
if modifier == ".regex":
has_match = False
for reg in filter_data:
for name in attrs:
if re.compile(reg).search(name):
has_match = True
if has_match is False:
return False
elif modifier in [".count_gt", ".count_gte", ".count_lt", ".count_lte"]:
test_number = len(attrs) if attrs else 0
modifier = f".{modifier[7:]}"
if test_number is None or util.is_number_filter(test_number, modifier, filter_data):
return False
elif (not list(set(filter_data) & set(attrs)) and modifier == "") \
or (list(set(filter_data) & set(attrs)) and modifier == ".not"):
return False
return True