2021-02-24 06:42:58 +00:00
import logging , math , re , requests
2021-01-20 21:37:59 +00:00
from lxml import html
from modules import util
from modules . util import Failed
from retrying import retry
logger = logging . getLogger ( " Plex Meta Manager " )
2021-03-30 05:50:53 +00:00
builders = [ " imdb_list " , " imdb_id " ]
2021-06-14 15:24:11 +00:00
class IMDb :
2021-03-05 20:33:24 +00:00
def __init__ ( self , config ) :
self . config = config
self . urls = {
" list " : " https://www.imdb.com/list/ls " ,
2021-04-05 15:12:57 +00:00
" search " : " https://www.imdb.com/search/title/? " ,
" keyword " : " https://www.imdb.com/search/keyword/? "
2021-03-05 20:33:24 +00:00
}
2021-01-20 21:37:59 +00:00
2021-04-15 18:39:47 +00:00
def validate_imdb_url ( self , imdb_url , language ) :
2021-01-20 21:37:59 +00:00
imdb_url = imdb_url . strip ( )
2021-04-05 15:12:57 +00:00
if not imdb_url . startswith ( self . urls [ " list " ] ) and not imdb_url . startswith ( self . urls [ " search " ] ) and not imdb_url . startswith ( self . urls [ " keyword " ] ) :
raise Failed ( f " IMDb Error: { imdb_url } must begin with either: \n { self . urls [ ' list ' ] } (For Lists) \n { self . urls [ ' search ' ] } (For Searches) \n { self . urls [ ' keyword ' ] } (For Keyword Searches) " )
2021-05-07 19:53:54 +00:00
total , _ = self . _total ( self . _fix_url ( imdb_url ) , language )
2021-04-15 18:39:47 +00:00
if total > 0 :
return imdb_url
raise Failed ( f " IMDb Error: { imdb_url } failed to parse " )
2021-01-20 21:37:59 +00:00
2021-05-07 19:53:54 +00:00
def _fix_url ( self , imdb_url ) :
2021-03-05 20:33:24 +00:00
if imdb_url . startswith ( self . urls [ " list " ] ) :
2021-01-20 21:37:59 +00:00
try : list_id = re . search ( " ( \\ d+) " , str ( imdb_url ) ) . group ( 1 )
2021-02-24 06:44:06 +00:00
except AttributeError : raise Failed ( f " IMDb Error: Failed to parse List ID from { imdb_url } " )
2021-04-15 18:39:47 +00:00
return f " { self . urls [ ' search ' ] } lists=ls { list_id } "
elif imdb_url . endswith ( " / " ) :
return imdb_url [ : - 1 ]
2021-01-20 21:37:59 +00:00
else :
2021-04-15 18:39:47 +00:00
return imdb_url
2021-05-07 19:53:54 +00:00
def _total ( self , imdb_url , language ) :
2021-01-20 21:37:59 +00:00
header = { " Accept-Language " : language }
2021-04-05 15:12:57 +00:00
if imdb_url . startswith ( self . urls [ " keyword " ] ) :
2021-05-07 19:53:54 +00:00
results = self . _request ( imdb_url , header ) . xpath ( " //div[@class= ' desc ' ]/text() " )
2021-04-05 15:12:57 +00:00
total = None
for result in results :
if " title " in result :
try :
total = int ( re . findall ( " ( \\ d+) title " , result ) [ 0 ] )
break
except IndexError :
pass
if total is None :
raise Failed ( f " IMDb Error: No Results at URL: { imdb_url } " )
2021-04-15 18:39:47 +00:00
return total , 50
2021-04-05 15:12:57 +00:00
else :
2021-05-07 19:53:54 +00:00
try : results = self . _request ( imdb_url , header ) . xpath ( " //div[@class= ' desc ' ]/span/text() " ) [ 0 ] . replace ( " , " , " " )
2021-04-05 15:12:57 +00:00
except IndexError : raise Failed ( f " IMDb Error: Failed to parse URL: { imdb_url } " )
try : total = int ( re . findall ( " ( \\ d+) title " , results ) [ 0 ] )
except IndexError : raise Failed ( f " IMDb Error: No Results at URL: { imdb_url } " )
2021-04-15 18:39:47 +00:00
return total , 250
2021-05-07 19:53:54 +00:00
def _ids_from_url ( self , imdb_url , language , limit ) :
current_url = self . _fix_url ( imdb_url )
total , item_count = self . _total ( current_url , language )
2021-04-15 18:39:47 +00:00
header = { " Accept-Language " : language }
imdb_ids = [ ]
2021-02-24 06:42:58 +00:00
if " &start= " in current_url : current_url = re . sub ( " &start= \\ d+ " , " " , current_url )
if " &count= " in current_url : current_url = re . sub ( " &count= \\ d+ " , " " , current_url )
2021-04-05 15:12:57 +00:00
if " &page= " in current_url : current_url = re . sub ( " &page= \\ d+ " , " " , current_url )
2021-01-20 21:37:59 +00:00
if limit < 1 or total < limit : limit = total
2021-04-05 15:12:57 +00:00
remainder = limit % item_count
if remainder == 0 : remainder = item_count
num_of_pages = math . ceil ( int ( limit ) / item_count )
2021-01-20 21:37:59 +00:00
for i in range ( 1 , num_of_pages + 1 ) :
2021-04-05 15:12:57 +00:00
start_num = ( i - 1 ) * item_count + 1
2021-05-26 13:25:32 +00:00
util . print_return ( f " Parsing Page { i } / { num_of_pages } { start_num } - { limit if i == num_of_pages else i * item_count } " )
2021-04-05 15:12:57 +00:00
if imdb_url . startswith ( self . urls [ " keyword " ] ) :
2021-05-07 19:53:54 +00:00
response = self . _request ( f " { current_url } &page= { i } " , header )
2021-04-05 15:12:57 +00:00
else :
2021-05-07 19:53:54 +00:00
response = self . _request ( f " { current_url } &count= { remainder if i == num_of_pages else item_count } &start= { start_num } " , header )
2021-04-05 15:12:57 +00:00
if imdb_url . startswith ( self . urls [ " keyword " ] ) and i == num_of_pages :
imdb_ids . extend ( response . xpath ( " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " ) [ : remainder ] )
else :
imdb_ids . extend ( response . xpath ( " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " ) )
2021-05-26 13:25:32 +00:00
util . print_end ( )
2021-01-20 21:37:59 +00:00
if imdb_ids : return imdb_ids
2021-04-05 15:12:57 +00:00
else : raise Failed ( f " IMDb Error: No IMDb IDs Found at { imdb_url } " )
2021-01-20 21:37:59 +00:00
@retry ( stop_max_attempt_number = 6 , wait_fixed = 10000 )
2021-05-07 19:53:54 +00:00
def _request ( self , url , header ) :
2021-01-21 21:42:31 +00:00
return html . fromstring ( requests . get ( url , headers = header ) . content )
2021-01-20 21:37:59 +00:00
2021-05-21 14:30:23 +00:00
def get_items ( self , method , data , language , is_movie ) :
2021-01-20 21:37:59 +00:00
pretty = util . pretty_names [ method ] if method in util . pretty_names else method
show_ids = [ ]
movie_ids = [ ]
2021-05-21 14:30:23 +00:00
def run_convert ( imdb_id ) :
tvdb_id = self . config . Convert . imdb_to_tvdb ( imdb_id ) if not is_movie else None
2021-05-22 04:45:03 +00:00
tmdb_id = self . config . Convert . imdb_to_tmdb ( imdb_id ) if tvdb_id is None else None
2021-05-08 23:49:55 +00:00
if not tmdb_id and not tvdb_id :
2021-05-24 03:38:46 +00:00
logger . error ( f " Convert Error: No { ' ' if is_movie else ' TVDb ID or ' } TMDb ID found for IMDb: { imdb_id } " )
2021-01-20 21:37:59 +00:00
if tmdb_id : movie_ids . append ( tmdb_id )
if tvdb_id : show_ids . append ( tvdb_id )
2021-05-21 14:30:23 +00:00
if method == " imdb_id " :
logger . info ( f " Processing { pretty } : { data } " )
run_convert ( data )
2021-01-20 21:37:59 +00:00
elif method == " imdb_list " :
2021-05-09 05:37:45 +00:00
status = f " { data [ ' limit ' ] } Items at " if data [ ' limit ' ] > 0 else ' '
logger . info ( f " Processing { pretty } : { status } { data [ ' url ' ] } " )
2021-05-07 19:53:54 +00:00
imdb_ids = self . _ids_from_url ( data [ " url " ] , language , data [ " limit " ] )
2021-01-20 21:37:59 +00:00
total_ids = len ( imdb_ids )
2021-05-21 14:30:23 +00:00
for i , imdb in enumerate ( imdb_ids , 1 ) :
2021-05-26 13:25:32 +00:00
util . print_return ( f " Converting IMDb ID { i } / { total_ids } " )
2021-05-21 14:30:23 +00:00
run_convert ( imdb )
2021-05-26 13:25:32 +00:00
logger . info ( util . adjust_space ( f " Processed { total_ids } IMDb IDs " ) )
2021-01-20 21:37:59 +00:00
else :
2021-02-24 06:44:06 +00:00
raise Failed ( f " IMDb Error: Method { method } not supported " )
2021-05-24 03:38:46 +00:00
logger . debug ( " " )
2021-05-09 05:37:45 +00:00
logger . debug ( f " TMDb IDs Found: { movie_ids } " )
logger . debug ( f " TVDb IDs Found: { show_ids } " )
2021-01-20 21:37:59 +00:00
return movie_ids , show_ids