2021-07-14 14:47:20 +00:00
import logging , math , re , time
2021-01-20 21:37:59 +00:00
from modules import util
from modules . util import Failed
2021-07-29 02:26:39 +00:00
from urllib . parse import urlparse , parse_qs
2021-01-20 21:37:59 +00:00
logger = logging . getLogger ( " Plex Meta Manager " )
2021-03-30 05:50:53 +00:00
builders = [ " imdb_list " , " imdb_id " ]
2021-07-14 14:47:20 +00:00
base_url = " https://www.imdb.com "
urls = {
" list " : f " { base_url } /list/ls " ,
2021-07-29 02:26:39 +00:00
" search " : f " { base_url } /search/title/ " ,
" keyword " : f " { base_url } /search/keyword/ "
2021-07-14 14:47:20 +00:00
}
2021-07-29 02:26:39 +00:00
xpath = {
2021-07-29 13:36:30 +00:00
" imdb_id " : " //div[contains(@class, ' lister-item-image ' )]//a/img//@data-tconst " ,
2021-07-29 02:26:39 +00:00
" list " : " //div[@class= ' desc lister-total-num-results ' ]/text() " ,
" search " : " //div[@class= ' desc ' ]/span/text() " ,
" keyword " : " //div[@class= ' desc ' ]/text() "
}
item_counts = { " list " : 100 , " search " : 250 , " keyword " : 50 }
2021-03-30 05:50:53 +00:00
2021-06-14 15:24:11 +00:00
class IMDb :
2021-03-05 20:33:24 +00:00
def __init__ ( self , config ) :
self . config = config
2021-01-20 21:37:59 +00:00
2021-07-21 17:40:05 +00:00
def validate_imdb_lists ( self , imdb_lists , language ) :
valid_lists = [ ]
2021-07-29 02:26:39 +00:00
for imdb_dict in util . get_list ( imdb_lists , split = False ) :
if not isinstance ( imdb_dict , dict ) :
imdb_dict = { " url " : imdb_dict }
dict_methods = { dm . lower ( ) : dm for dm in imdb_dict }
2021-07-30 19:19:43 +00:00
imdb_url = util . parse ( " url " , imdb_dict , methods = dict_methods , parent = " imdb_list " ) . strip ( )
2021-07-29 02:26:39 +00:00
if not imdb_url . startswith ( ( urls [ " list " ] , urls [ " search " ] , urls [ " keyword " ] ) ) :
raise Failed ( f " IMDb Error: { imdb_url } must begin with either: \n { urls [ ' list ' ] } (For Lists) \n { urls [ ' search ' ] } (For Searches) \n { urls [ ' keyword ' ] } (For Keyword Searches) " )
self . _total ( imdb_url , language )
2021-07-30 19:19:43 +00:00
list_count = util . parse ( " limit " , imdb_dict , datatype = " int " , methods = dict_methods , default = 0 , parent = " imdb_list " , minimum = 0 ) if " limit " in dict_methods else 0
2021-07-21 17:40:05 +00:00
valid_lists . append ( { " url " : imdb_url , " limit " : list_count } )
return valid_lists
2021-05-07 19:53:54 +00:00
def _total ( self , imdb_url , language ) :
2021-07-14 14:47:20 +00:00
headers = util . header ( language )
if imdb_url . startswith ( urls [ " keyword " ] ) :
2021-07-29 02:26:39 +00:00
page_type = " keyword "
elif imdb_url . startswith ( urls [ " list " ] ) :
page_type = " list "
2021-04-05 15:12:57 +00:00
else :
2021-07-29 02:26:39 +00:00
page_type = " search "
results = self . config . get_html ( imdb_url , headers = headers ) . xpath ( xpath [ page_type ] )
total = 0
for result in results :
if " title " in result :
try :
total = int ( re . findall ( " ( \\ d+) title " , result . replace ( " , " , " " ) ) [ 0 ] )
break
except IndexError :
pass
if total > 0 :
return total , item_counts [ page_type ]
2021-10-04 17:51:32 +00:00
raise Failed ( f " IMDb Error: Failed to parse URL: { imdb_url } " )
2021-04-15 18:39:47 +00:00
2021-05-07 19:53:54 +00:00
def _ids_from_url ( self , imdb_url , language , limit ) :
2021-07-29 02:26:39 +00:00
total , item_count = self . _total ( imdb_url , language )
2021-07-14 14:47:20 +00:00
headers = util . header ( language )
2021-04-15 18:39:47 +00:00
imdb_ids = [ ]
2021-07-29 02:26:39 +00:00
parsed_url = urlparse ( imdb_url )
params = parse_qs ( parsed_url . query )
imdb_base = parsed_url . _replace ( query = None ) . geturl ( )
params . pop ( " start " , None )
params . pop ( " count " , None )
params . pop ( " page " , None )
2021-04-05 15:12:57 +00:00
2021-07-29 02:26:39 +00:00
if limit < 1 or total < limit :
limit = total
2021-04-05 15:12:57 +00:00
remainder = limit % item_count
2021-07-29 02:26:39 +00:00
if remainder == 0 :
remainder = item_count
2021-04-05 15:12:57 +00:00
num_of_pages = math . ceil ( int ( limit ) / item_count )
2021-01-20 21:37:59 +00:00
for i in range ( 1 , num_of_pages + 1 ) :
2021-04-05 15:12:57 +00:00
start_num = ( i - 1 ) * item_count + 1
2021-05-26 13:25:32 +00:00
util . print_return ( f " Parsing Page { i } / { num_of_pages } { start_num } - { limit if i == num_of_pages else i * item_count } " )
2021-07-29 02:26:39 +00:00
if imdb_base . startswith ( ( urls [ " list " ] , urls [ " keyword " ] ) ) :
params [ " page " ] = i
2021-04-05 15:12:57 +00:00
else :
2021-07-29 02:26:39 +00:00
params [ " count " ] = remainder if i == num_of_pages else item_count
params [ " start " ] = start_num
ids_found = self . config . get_html ( imdb_base , headers = headers , params = params ) . xpath ( xpath [ " imdb_id " ] )
if imdb_base . startswith ( ( urls [ " list " ] , urls [ " keyword " ] ) ) and i == num_of_pages :
ids_found = ids_found [ : remainder ]
imdb_ids . extend ( ids_found )
2021-07-14 14:47:20 +00:00
time . sleep ( 2 )
2021-05-26 13:25:32 +00:00
util . print_end ( )
2021-07-29 02:26:39 +00:00
if len ( imdb_ids ) > 0 :
2021-07-29 13:36:30 +00:00
logger . debug ( f " { len ( imdb_ids ) } IMDb IDs Found: { imdb_ids } " )
2021-07-29 02:26:39 +00:00
return imdb_ids
2021-10-04 17:51:32 +00:00
raise Failed ( f " IMDb Error: No IMDb IDs Found at { imdb_url } " )
2021-01-20 21:37:59 +00:00
2021-08-07 06:01:21 +00:00
def get_imdb_ids ( self , method , data , language ) :
2021-05-21 14:30:23 +00:00
if method == " imdb_id " :
2021-08-01 04:35:42 +00:00
logger . info ( f " Processing IMDb ID: { data } " )
2021-08-07 06:01:21 +00:00
return [ ( data , " imdb " ) ]
2021-01-20 21:37:59 +00:00
elif method == " imdb_list " :
2021-05-09 05:37:45 +00:00
status = f " { data [ ' limit ' ] } Items at " if data [ ' limit ' ] > 0 else ' '
2021-08-01 04:35:42 +00:00
logger . info ( f " Processing IMDb List: { status } { data [ ' url ' ] } " )
2021-08-07 06:01:21 +00:00
return [ ( i , " imdb " ) for i in self . _ids_from_url ( data [ " url " ] , language , data [ " limit " ] ) ]
2021-01-20 21:37:59 +00:00
else :
2021-02-24 06:44:06 +00:00
raise Failed ( f " IMDb Error: Method { method } not supported " )