added letterboxd cache #102

This commit is contained in:
meisnate12 2021-03-12 14:49:23 -05:00
parent 97f7bea2fe
commit 24401def89
2 changed files with 62 additions and 17 deletions

View file

@ -36,6 +36,13 @@ class Cache:
expiration_date TEXT,
media_type TEXT)"""
)
cursor.execute(
"""CREATE TABLE IF NOT EXISTS letterboxd_map (
INTEGER PRIMARY KEY,
letterboxd_id TEXT UNIQUE,
tmdb_id TEXT,
expiration_date TEXT)"""
)
cursor.execute(
"""CREATE TABLE IF NOT EXISTS omdb_data (
INTEGER PRIMARY KEY,
@ -176,6 +183,29 @@ class Cache:
cursor.execute("INSERT OR IGNORE INTO imdb_map(imdb_id) VALUES(?)", (imdb_id,))
cursor.execute("UPDATE imdb_map SET t_id = ?, expiration_date = ?, media_type = ? WHERE imdb_id = ?", (t_id, expiration_date.strftime("%Y-%m-%d"), media_type, imdb_id))
def query_letterboxd_map(self, letterboxd_id):
tmdb_id = None
expired = None
with sqlite3.connect(self.cache_path) as connection:
connection.row_factory = sqlite3.Row
with closing(connection.cursor()) as cursor:
cursor.execute("SELECT * FROM letterboxd_map WHERE letterboxd_id = ?", (letterboxd_id, ))
row = cursor.fetchone()
if row and row["tmdb_id"]:
datetime_object = datetime.strptime(row["expiration_date"], "%Y-%m-%d")
time_between_insertion = datetime.now() - datetime_object
tmdb_id = int(row["tmdb_id"])
expired = time_between_insertion.days > self.expiration
return tmdb_id, expired
def update_letterboxd(self, expired, letterboxd_id, tmdb_id):
expiration_date = datetime.now() if expired is True else (datetime.now() - timedelta(days=random.randint(1, self.expiration)))
with sqlite3.connect(self.cache_path) as connection:
connection.row_factory = sqlite3.Row
with closing(connection.cursor()) as cursor:
cursor.execute("INSERT OR IGNORE INTO letterboxd_map(letterboxd_id) VALUES(?)", (letterboxd_id,))
cursor.execute("UPDATE letterboxd_map SET tmdb_id = ?, expiration_date = ? WHERE letterboxd_id = ?", (tmdb_id, expiration_date.strftime("%Y-%m-%d"), letterboxd_id))
def query_omdb(self, imdb_id):
omdb_dict = {}
expired = None

View file

@ -7,8 +7,9 @@ from retrying import retry
logger = logging.getLogger("Plex Meta Manager")
class LetterboxdAPI:
def __init__(self):
def __init__(self, Cache=None):
self.url = "https://letterboxd.com"
self.Cache = Cache
@retry(stop_max_attempt_number=6, wait_fixed=10000)
def send_request(self, url, language):
@ -18,22 +19,28 @@ class LetterboxdAPI:
descriptions = self.send_request(list_url, language).xpath("//meta[@property='og:description']/@content")
return descriptions[0] if len(descriptions) > 0 and len(descriptions[0]) > 0 else None
def parse_list_for_slugs(self, list_url, language):
def parse_list(self, list_url, language):
response = self.send_request(list_url, language)
slugs = response.xpath("//div[@class='poster film-poster really-lazy-load']/@data-film-slug")
letterboxd_ids = response.xpath("//div[@class='poster film-poster really-lazy-load']/@data-film-id")
items = []
for letterboxd_id in letterboxd_ids:
slugs = response.xpath(f"//div[@data-film-id='{letterboxd_id}']/@data-film-slug")
items.append((letterboxd_id, slugs[0]))
next_url = response.xpath("//a[@class='next']/@href")
if len(next_url) > 0:
slugs.extend(self.parse_list_for_slugs(f"{self.url}{next_url[0]}", language))
return slugs
items.extend(self.parse_list(f"{self.url}{next_url[0]}", language))
return items
def get_tmdb_from_slug(self, slug, language):
return self.get_tmdb(f"{self.url}{slug}", language)
def get_tmdb(self, letterboxd_url, language):
response = self.send_request(letterboxd_url, language)
ids = response.xpath("//body/@data-tmdb-id")
ids = response.xpath("//a[@data-track-action='TMDb']/@href")
if len(ids) > 0 and ids[0]:
return int(ids[0])
if "themoviedb.org/movie" in ids[0]:
return util.regex_first_int(ids[0], "TMDB Movie ID")
raise Failed(f"Letterboxd Error: TMDb Movie ID not found in {ids[0]}")
raise Failed(f"Letterboxd Error: TMDb Movie ID not found at {letterboxd_url}")
def get_items(self, method, data, language, status_message=True):
@ -41,18 +48,26 @@ class LetterboxdAPI:
movie_ids = []
if status_message:
logger.info(f"Processing {pretty}: {data}")
slugs = self.parse_list_for_slugs(data, language)
total_slugs = len(slugs)
if total_slugs == 0:
items = self.parse_list(data, language)
total_items = len(items)
if total_items == 0:
raise Failed(f"Letterboxd Error: No List Items found in {data}")
length = 0
for i, slug in enumerate(slugs, 1):
length = util.print_return(length, f"Finding TMDb ID {i}/{total_slugs}")
try:
movie_ids.append(self.get_tmdb_from_slug(slug, language))
except Failed as e:
logger.error(e)
util.print_end(length, f"Processed {total_slugs} TMDb IDs")
for i, item in enumerate(items, 1):
length = util.print_return(length, f"Finding TMDb ID {i}/{total_items}")
tmdb_id = None
expired = None
if self.Cache:
tmdb_id, expired = self.Cache.query_letterboxd_map(item[0])
if not tmdb_id or expired is not False:
try:
tmdb_id = self.get_tmdb_from_slug(item[1], language)
if self.Cache:
self.Cache.update_letterboxd(expired, item[0], tmdb_id)
except Failed as e:
logger.error(e)
movie_ids.append(tmdb_id)
util.print_end(length, f"Processed {total_items} TMDb IDs")
if status_message:
logger.debug(f"TMDb IDs Found: {movie_ids}")
return movie_ids, []