2022-02-13 16:33:57 +00:00
|
|
|
import time
|
2021-03-04 20:05:51 +00:00
|
|
|
from modules import util
|
|
|
|
from modules.util import Failed
|
|
|
|
|
2022-02-13 16:33:57 +00:00
|
|
|
logger = util.logger
|
2021-03-04 20:05:51 +00:00
|
|
|
|
2021-03-30 05:50:53 +00:00
|
|
|
builders = ["letterboxd_list", "letterboxd_list_details"]
|
2021-07-14 14:47:20 +00:00
|
|
|
base_url = "https://letterboxd.com"
|
2021-03-30 05:50:53 +00:00
|
|
|
|
2021-06-14 15:24:11 +00:00
|
|
|
class Letterboxd:
|
2021-03-12 23:16:26 +00:00
|
|
|
def __init__(self, config):
|
|
|
|
self.config = config
|
2021-03-08 21:54:19 +00:00
|
|
|
|
2021-05-07 19:53:54 +00:00
|
|
|
def _parse_list(self, list_url, language):
|
2021-11-03 14:38:43 +00:00
|
|
|
if self.config.trace_mode:
|
|
|
|
logger.debug(f"URL: {list_url}")
|
2021-07-14 14:47:20 +00:00
|
|
|
response = self.config.get_html(list_url, headers=util.header(language))
|
2021-07-20 12:42:58 +00:00
|
|
|
letterboxd_ids = response.xpath("//li[contains(@class, 'poster-container')]/div/@data-film-id")
|
2021-03-12 19:49:23 +00:00
|
|
|
items = []
|
|
|
|
for letterboxd_id in letterboxd_ids:
|
|
|
|
slugs = response.xpath(f"//div[@data-film-id='{letterboxd_id}']/@data-film-slug")
|
|
|
|
items.append((letterboxd_id, slugs[0]))
|
2021-03-04 20:05:51 +00:00
|
|
|
next_url = response.xpath("//a[@class='next']/@href")
|
|
|
|
if len(next_url) > 0:
|
2021-07-14 14:47:20 +00:00
|
|
|
time.sleep(2)
|
|
|
|
items.extend(self._parse_list(f"{base_url}{next_url[0]}", language))
|
2021-03-12 19:49:23 +00:00
|
|
|
return items
|
2021-03-04 20:05:51 +00:00
|
|
|
|
2021-05-07 19:53:54 +00:00
|
|
|
def _tmdb(self, letterboxd_url, language):
|
2021-11-03 14:38:43 +00:00
|
|
|
if self.config.trace_mode:
|
|
|
|
logger.debug(f"URL: {letterboxd_url}")
|
2021-07-14 14:47:20 +00:00
|
|
|
response = self.config.get_html(letterboxd_url, headers=util.header(language))
|
2021-03-12 19:49:23 +00:00
|
|
|
ids = response.xpath("//a[@data-track-action='TMDb']/@href")
|
2021-03-11 21:51:02 +00:00
|
|
|
if len(ids) > 0 and ids[0]:
|
2021-03-12 19:49:23 +00:00
|
|
|
if "themoviedb.org/movie" in ids[0]:
|
2022-03-06 23:51:12 +00:00
|
|
|
return util.regex_first_int(ids[0], "TMDb Movie ID")
|
2021-03-12 19:49:23 +00:00
|
|
|
raise Failed(f"Letterboxd Error: TMDb Movie ID not found in {ids[0]}")
|
2021-03-11 21:53:43 +00:00
|
|
|
raise Failed(f"Letterboxd Error: TMDb Movie ID not found at {letterboxd_url}")
|
2021-03-04 20:05:51 +00:00
|
|
|
|
2021-05-07 19:53:54 +00:00
|
|
|
def get_list_description(self, list_url, language):
|
2021-11-03 14:38:43 +00:00
|
|
|
if self.config.trace_mode:
|
|
|
|
logger.debug(f"URL: {list_url}")
|
2021-07-14 14:47:20 +00:00
|
|
|
response = self.config.get_html(list_url, headers=util.header(language))
|
|
|
|
descriptions = response.xpath("//meta[@property='og:description']/@content")
|
2021-05-07 19:53:54 +00:00
|
|
|
return descriptions[0] if len(descriptions) > 0 and len(descriptions[0]) > 0 else None
|
|
|
|
|
2021-07-21 17:40:05 +00:00
|
|
|
def validate_letterboxd_lists(self, letterboxd_lists, language):
|
|
|
|
valid_lists = []
|
|
|
|
for letterboxd_list in util.get_list(letterboxd_lists, split=False):
|
|
|
|
list_url = letterboxd_list.strip()
|
|
|
|
if not list_url.startswith(base_url):
|
|
|
|
raise Failed(f"Letterboxd Error: {list_url} must begin with: {base_url}")
|
2021-07-22 17:57:23 +00:00
|
|
|
elif len(self._parse_list(list_url, language)) > 0:
|
2021-07-21 17:40:05 +00:00
|
|
|
valid_lists.append(list_url)
|
2021-07-22 17:57:23 +00:00
|
|
|
else:
|
|
|
|
raise Failed(f"Letterboxd Error: {list_url} failed to parse")
|
2021-07-21 17:40:05 +00:00
|
|
|
return valid_lists
|
|
|
|
|
2021-08-07 06:01:21 +00:00
|
|
|
def get_tmdb_ids(self, method, data, language):
|
2021-08-01 04:35:42 +00:00
|
|
|
if method == "letterboxd_list":
|
|
|
|
logger.info(f"Processing Letterboxd List: {data}")
|
|
|
|
items = self._parse_list(data, language)
|
|
|
|
total_items = len(items)
|
|
|
|
if total_items > 0:
|
2021-08-07 06:01:21 +00:00
|
|
|
ids = []
|
2021-08-01 04:35:42 +00:00
|
|
|
for i, item in enumerate(items, 1):
|
|
|
|
letterboxd_id, slug = item
|
2022-02-13 16:33:57 +00:00
|
|
|
logger.ghost(f"Finding TMDb ID {i}/{total_items}")
|
2021-08-01 04:35:42 +00:00
|
|
|
tmdb_id = None
|
|
|
|
expired = None
|
2021-05-09 05:38:41 +00:00
|
|
|
if self.config.Cache:
|
2021-08-01 04:35:42 +00:00
|
|
|
tmdb_id, expired = self.config.Cache.query_letterboxd_map(letterboxd_id)
|
|
|
|
if not tmdb_id or expired is not False:
|
|
|
|
try:
|
|
|
|
tmdb_id = self._tmdb(f"{base_url}{slug}", language)
|
|
|
|
except Failed as e:
|
|
|
|
logger.error(e)
|
|
|
|
continue
|
|
|
|
if self.config.Cache:
|
|
|
|
self.config.Cache.update_letterboxd_map(expired, letterboxd_id, tmdb_id)
|
2021-08-07 06:01:21 +00:00
|
|
|
ids.append((tmdb_id, "tmdb"))
|
2022-02-13 16:33:57 +00:00
|
|
|
logger.info(f"Processed {total_items} TMDb IDs")
|
2021-08-07 06:01:21 +00:00
|
|
|
return ids
|
2021-08-01 04:35:42 +00:00
|
|
|
else:
|
2021-08-07 06:01:21 +00:00
|
|
|
raise Failed(f"Letterboxd Error: No List Items found in {data}")
|
2021-05-09 05:38:41 +00:00
|
|
|
else:
|
2021-08-01 04:35:42 +00:00
|
|
|
raise Failed(f"Letterboxd Error: Method {method} not supported")
|