[tubitv] Fix/improve TV series extraction (#2829)

Authored by: bbepis
This commit is contained in:
Bepis 2022-02-19 23:00:51 +11:00 committed by GitHub
parent ed66a17ef0
commit febff4c119
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 2 deletions

View file

@ -107,6 +107,9 @@ class TubiTvIE(InfoExtractor):
'url': self._proto_relative_url(sub_url), 'url': self._proto_relative_url(sub_url),
}) })
season_number, episode_number, episode_title = self._search_regex(
r'^S(\d+):E(\d+) - (.+)', title, 'episode info', fatal=False, group=(1, 2, 3), default=(None, None, None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
@ -117,6 +120,9 @@ class TubiTvIE(InfoExtractor):
'duration': int_or_none(video_data.get('duration')), 'duration': int_or_none(video_data.get('duration')),
'uploader_id': video_data.get('publisher_id'), 'uploader_id': video_data.get('publisher_id'),
'release_year': int_or_none(video_data.get('year')), 'release_year': int_or_none(video_data.get('year')),
'season_number': int_or_none(season_number),
'episode_number': int_or_none(episode_number),
'episode_title': episode_title
} }
@ -132,9 +138,11 @@ class TubiTvShowIE(InfoExtractor):
def _entries(self, show_url, show_name): def _entries(self, show_url, show_name):
show_webpage = self._download_webpage(show_url, show_name) show_webpage = self._download_webpage(show_url, show_name)
show_json = self._parse_json(self._search_regex( show_json = self._parse_json(self._search_regex(
r"window\.__data\s*=\s*({.+?});\s*</script>", r'window\.__data\s*=\s*({[^<]+});\s*</script>',
show_webpage, 'data',), show_name, transform_source=js_to_json)['video'] show_webpage, 'data'), show_name, transform_source=js_to_json)['video']
for episode_id in show_json['fullContentById'].keys(): for episode_id in show_json['fullContentById'].keys():
yield self.url_result( yield self.url_result(
'tubitv:%s' % episode_id, 'tubitv:%s' % episode_id,

View file

@ -3143,6 +3143,8 @@ def js_to_json(code, vars={}):
return '"%s"' % v return '"%s"' % v
code = re.sub(r'new Date\((".+")\)', r'\g<1>', code)
return re.sub(r'''(?sx) return re.sub(r'''(?sx)
"(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"| "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
'(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'| '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|