Update to ytdl-commit-3be0980

3be098010f
2024-12-13 23:02:34 +00:00 · 2021-03-15 04:52:06 +05:30 · 2021-03-15 04:52:06 +05:30 · 10db0d2f57
commit 10db0d2f57
parent 7275535116
17 changed files with 356 additions and 253 deletions
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@ -1794,14 +1794,18 @@ class YoutubeDL(object):
        if 'display_id' not in info_dict and 'id' in info_dict:
            info_dict['display_id'] = info_dict['id']

-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-            # see http://bugs.python.org/issue1646728)
-            try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-            except (ValueError, OverflowError, OSError):
-                pass
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass

        # Auto generate title fields corresponding to the *_number fields when missing
        # in order to always have clean titles. This is very common for TV series.
--- a/yt_dlp/extractor/applepodcasts.py
+++ b/yt_dlp/extractor/applepodcasts.py
@ -42,6 +42,7 @@ class ApplePodcastsIE(InfoExtractor):
        ember_data = self._parse_json(self._search_regex(
            r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
            webpage, 'ember data'), episode_id)
+        ember_data = ember_data.get(episode_id) or ember_data
        episode = ember_data['data']['attributes']
        description = episode.get('description') or {}

--- a/yt_dlp/extractor/bandcamp.py
+++ b/yt_dlp/extractor/bandcamp.py
@ -49,6 +49,7 @@ class BandcampIE(InfoExtractor):
            'uploader': 'Ben Prunty',
            'timestamp': 1396508491,
            'upload_date': '20140403',
+            'release_timestamp': 1396483200,
            'release_date': '20140403',
            'duration': 260.877,
            'track': 'Lanius (Battle)',
@ -69,6 +70,7 @@ class BandcampIE(InfoExtractor):
            'uploader': 'Mastodon',
            'timestamp': 1322005399,
            'upload_date': '20111122',
+            'release_timestamp': 1076112000,
            'release_date': '20040207',
            'duration': 120.79,
            'track': 'Hail to Fire',
@ -197,7 +199,7 @@ class BandcampIE(InfoExtractor):
            'thumbnail': thumbnail,
            'uploader': artist,
            'timestamp': timestamp,
-            'release_date': unified_strdate(tralbum.get('album_release_date')),
+            'release_timestamp': unified_timestamp(tralbum.get('album_release_date')),
            'duration': duration,
            'track': track,
            'track_number': track_number,
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@ -138,11 +138,6 @@ class BiliBiliIE(InfoExtractor):
        anime_id = mobj.group('anime_id')
        page_id = mobj.group('page')
        webpage = self._download_webpage(url, video_id)
-        headers = {
-            'Referer': url,
-            'Accept': '*/*'
-        }
-        headers.update(self.geo_verification_headers())

        if 'anime/' not in url:
            cid = self._search_regex(
@ -160,8 +155,12 @@ class BiliBiliIE(InfoExtractor):
            if 'no_bangumi_tip' not in smuggled_data:
                self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
                    video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
+            headers = {
+                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
+                'Referer': url
+            }
+            headers.update(self.geo_verification_headers())

-            headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
            js = self._download_json(
                'http://bangumi.bilibili.com/web_api/get_source', video_id,
                data=urlencode_postdata({'episode_id': video_id}),
@ -170,6 +169,12 @@ class BiliBiliIE(InfoExtractor):
                self._report_error(js)
            cid = js['result']['cid']

+        headers = {
+            'Accept': 'application/json',
+            'Referer': url
+        }
+        headers.update(self.geo_verification_headers())
+
        entries = []

        RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
--- a/yt_dlp/extractor/cbs.py
+++ b/yt_dlp/extractor/cbs.py
@ -27,7 +27,7 @@ class CBSBaseIE(ThePlatformFeedIE):


 class CBSIE(CBSBaseIE):
-    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs\.com|paramountplus\.com)/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'
+    _VALID_URL = r'(?:cbs:|https?://(?:www\.)?(?:(?:cbs|paramountplus)\.com/shows/[^/]+/video|colbertlateshow\.com/(?:video|podcasts))/)(?P<id>[\w-]+)'

    _TESTS = [{
        'url': 'https://www.cbs.com/shows/garth-brooks/video/_u7W953k6la293J7EPTd9oHkSPs6Xn6_/connect-chat-feat-garth-brooks/',
@ -53,7 +53,7 @@ class CBSIE(CBSBaseIE):
        'url': 'http://www.colbertlateshow.com/podcasts/dYSwjqPs_X1tvbV_P2FcPWRa_qT6akTC/in-the-bad-room-with-stephen/',
        'only_matching': True,
    }, {
-        'url': 'https://www.paramountplus.com/shows/star-trek-discovery/video/l5ANMH9wM7kxwV1qr4u1xn88XOhYMlZX/star-trek-discovery-the-vulcan-hello/',
+        'url': 'https://www.paramountplus.com/shows/all-rise/video/QmR1WhNkh1a_IrdHZrbcRklm176X_rVc/all-rise-space/',
        'only_matching': True,
    }]

--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@ -231,8 +231,9 @@ class InfoExtractor(object):
    uploader:       Full name of the video uploader.
    license:        License name the video is licensed under.
    creator:        The creator of the video.
+    release_timestamp: UNIX timestamp of the moment the video was released.
    release_date:   The date (YYYYMMDD) when the video was released.
-    timestamp:      UNIX timestamp of the moment the video became available.
+    timestamp:      UNIX timestamp of the moment the video was uploaded
    upload_date:    Video upload date (YYYYMMDD).
                    If not explicitly set, calculated from timestamp.
    uploader_id:    Nickname or id of the video uploader.
--- a/yt_dlp/extractor/fujitv.py
+++ b/yt_dlp/extractor/fujitv.py
@ -17,7 +17,7 @@ class FujiTVFODPlus7IE(InfoExtractor):
    def _real_extract(self, url):
        video_id = self._match_id(url)
        formats = self._extract_m3u8_formats(
-            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id)
+            self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id, 'mp4')
        for f in formats:
            wh = self._BITRATE_MAP.get(f.get('tbr'))
            if wh:
--- a/yt_dlp/extractor/lbry.py
+++ b/yt_dlp/extractor/lbry.py
@ -6,8 +6,10 @@ import json

 from .common import InfoExtractor
 from ..compat import (
+    compat_parse_qs,
    compat_str,
    compat_urllib_parse_unquote,
+    compat_urllib_parse_urlparse,
 )
 from ..utils import (
    determine_ext,
@ -62,6 +64,7 @@ class LBRYBaseIE(InfoExtractor):
            'description': stream_value.get('description'),
            'license': stream_value.get('license'),
            'timestamp': int_or_none(stream.get('timestamp')),
+            'release_timestamp': int_or_none(stream_value.get('release_time')),
            'tags': stream_value.get('tags'),
            'duration': int_or_none(media.get('duration')),
            'channel': try_get(signing_channel, lambda x: x['value']['title']),
@ -94,6 +97,8 @@ class LBRYIE(LBRYBaseIE):
            'description': 'md5:f6cb5c704b332d37f5119313c2c98f51',
            'timestamp': 1595694354,
            'upload_date': '20200725',
+            'release_timestamp': 1595340697,
+            'release_date': '20200721',
            'width': 1280,
            'height': 720,
        }
@ -108,6 +113,8 @@ class LBRYIE(LBRYBaseIE):
            'description': 'md5:661ac4f1db09f31728931d7b88807a61',
            'timestamp': 1591312601,
            'upload_date': '20200604',
+            'release_timestamp': 1591312421,
+            'release_date': '20200604',
            'tags': list,
            'duration': 2570,
            'channel': 'The LBRY Foundation',
@ -189,17 +196,18 @@ class LBRYChannelIE(LBRYBaseIE):
    }]
    _PAGE_SIZE = 50

-    def _fetch_page(self, claim_id, url, page):
+    def _fetch_page(self, claim_id, url, params, page):
        page += 1
+        page_params = {
+            'channel_ids': [claim_id],
+            'claim_type': 'stream',
+            'no_totals': True,
+            'page': page,
+            'page_size': self._PAGE_SIZE,
+        }
+        page_params.update(params)
        result = self._call_api_proxy(
-            'claim_search', claim_id, {
-                'channel_ids': [claim_id],
-                'claim_type': 'stream',
-                'no_totals': True,
-                'page': page,
-                'page_size': self._PAGE_SIZE,
-                'stream_types': self._SUPPORTED_STREAM_TYPES,
-            }, 'page %d' % page)
+            'claim_search', claim_id, page_params, 'page %d' % page)
        for item in (result.get('items') or []):
            stream_claim_name = item.get('name')
            stream_claim_id = item.get('claim_id')
@ -220,8 +228,31 @@ class LBRYChannelIE(LBRYBaseIE):
        result = self._resolve_url(
            'lbry://' + display_id, display_id, 'channel')
        claim_id = result['claim_id']
+        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
+        content = qs.get('content', [None])[0]
+        params = {
+            'fee_amount': qs.get('fee_amount', ['>=0'])[0],
+            'order_by': {
+                'new': ['release_time'],
+                'top': ['effective_amount'],
+                'trending': ['trending_group', 'trending_mixed'],
+            }[qs.get('order', ['new'])[0]],
+            'stream_types': [content] if content in ['audio', 'video'] else self._SUPPORTED_STREAM_TYPES,
+        }
+        duration = qs.get('duration', [None])[0]
+        if duration:
+            params['duration'] = {
+                'long': '>=1200',
+                'short': '<=240',
+            }[duration]
+        language = qs.get('language', ['all'])[0]
+        if language != 'all':
+            languages = [language]
+            if language == 'en':
+                languages.append('none')
+            params['any_languages'] = languages
        entries = OnDemandPagedList(
-            functools.partial(self._fetch_page, claim_id, url),
+            functools.partial(self._fetch_page, claim_id, url, params),
            self._PAGE_SIZE)
        result_value = result.get('value') or {}
        return self.playlist_result(
--- a/yt_dlp/extractor/peertube.py
+++ b/yt_dlp/extractor/peertube.py
@ -599,11 +599,13 @@ class PeerTubeIE(InfoExtractor):
        else:
            age_limit = None

+        webpage_url = 'https://%s/videos/watch/%s' % (host, video_id)
+
        return {
            'id': video_id,
            'title': title,
            'description': description,
-            'thumbnail': urljoin(url, video.get('thumbnailPath')),
+            'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')),
            'timestamp': unified_timestamp(video.get('publishedAt')),
            'uploader': account_data('displayName', compat_str),
            'uploader_id': str_or_none(account_data('id', int)),
@ -621,5 +623,6 @@ class PeerTubeIE(InfoExtractor):
            'tags': try_get(video, lambda x: x['tags'], list),
            'categories': categories,
            'formats': formats,
-            'subtitles': subtitles
+            'subtitles': subtitles,
+            'webpage_url': webpage_url,
        }
--- a/yt_dlp/extractor/pinterest.py
+++ b/yt_dlp/extractor/pinterest.py
@ -31,6 +31,7 @@ class PinterestBaseIE(InfoExtractor):

        title = (data.get('title') or data.get('grid_title') or video_id).strip()

+        urls = []
        formats = []
        duration = None
        if extract_formats:
@ -38,8 +39,9 @@ class PinterestBaseIE(InfoExtractor):
                if not isinstance(format_dict, dict):
                    continue
                format_url = url_or_none(format_dict.get('url'))
-                if not format_url:
+                if not format_url or format_url in urls:
                    continue
+                urls.append(format_url)
                duration = float_or_none(format_dict.get('duration'), scale=1000)
                ext = determine_ext(format_url)
                if 'hls' in format_id.lower() or ext == 'm3u8':
--- a/yt_dlp/extractor/pornhub.py
+++ b/yt_dlp/extractor/pornhub.py
@ -167,6 +167,7 @@ class PornHubIE(PornHubBaseIE):
        'params': {
            'skip_download': True,
        },
+        'skip': 'Video has been flagged for verification in accordance with our trust and safety policy',
    }, {
        # subtitles
        'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7',
@ -265,7 +266,8 @@ class PornHubIE(PornHubBaseIE):
        webpage = dl_webpage('pc')

        error_msg = self._html_search_regex(
-            r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+            (r'(?s)<div[^>]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P<error>.+?)</div>',
+             r'(?s)<section[^>]+class=["\']noVideo["\'][^>]*>(?P<error>.+?)</section>'),
            webpage, 'error message', default=None, group='error')
        if error_msg:
            error_msg = re.sub(r'\s+', ' ', error_msg)
@ -394,6 +396,21 @@ class PornHubIE(PornHubBaseIE):

        upload_date = None
        formats = []
+
+        def add_format(format_url, height=None):
+            tbr = None
+            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', format_url)
+            if mobj:
+                if not height:
+                    height = int(mobj.group('height'))
+                tbr = int(mobj.group('tbr'))
+            formats.append({
+                'url': format_url,
+                'format_id': '%dp' % height if height else None,
+                'height': height,
+                'tbr': tbr,
+            })
+
        for video_url, height in video_urls:
            if not upload_date:
                upload_date = self._search_regex(
@ -410,18 +427,19 @@ class PornHubIE(PornHubBaseIE):
                    video_url, video_id, 'mp4', entry_protocol='m3u8_native',
                    m3u8_id='hls', fatal=False))
                continue
-            tbr = None
-            mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url)
-            if mobj:
-                if not height:
-                    height = int(mobj.group('height'))
-                tbr = int(mobj.group('tbr'))
-            formats.append({
-                'url': video_url,
-                'format_id': '%dp' % height if height else None,
-                'height': height,
-                'tbr': tbr,
-            })
+            if '/video/get_media' in video_url:
+                medias = self._download_json(video_url, video_id, fatal=False)
+                if isinstance(medias, list):
+                    for media in medias:
+                        if not isinstance(media, dict):
+                            continue
+                        video_url = url_or_none(media.get('videoUrl'))
+                        if not video_url:
+                            continue
+                        height = int_or_none(media.get('quality'))
+                        add_format(video_url, height)
+                continue
+            add_format(video_url)
        self._sort_formats(formats)

        video_uploader = self._html_search_regex(
--- a/yt_dlp/extractor/rtve.py
+++ b/yt_dlp/extractor/rtve.py
@ -2,8 +2,9 @@
 from __future__ import unicode_literals

 import base64
+import io
 import re
-import time
+import sys

 from .common import InfoExtractor
 from ..compat import (
@ -14,56 +15,13 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    float_or_none,
+    qualities,
    remove_end,
    remove_start,
-    sanitized_Request,
    std_headers,
 )

-
-def _decrypt_url(png):
-    encrypted_data = compat_b64decode(png)
-    text_index = encrypted_data.find(b'tEXt')
-    text_chunk = encrypted_data[text_index - 4:]
-    length = compat_struct_unpack('!I', text_chunk[:4])[0]
-    # Use bytearray to get integers when iterating in both python 2.x and 3.x
-    data = bytearray(text_chunk[8:8 + length])
-    data = [chr(b) for b in data if b != 0]
-    hash_index = data.index('#')
-    alphabet_data = data[:hash_index]
-    url_data = data[hash_index + 1:]
-    if url_data[0] == 'H' and url_data[3] == '%':
-        # remove useless HQ%% at the start
-        url_data = url_data[4:]
-
-    alphabet = []
-    e = 0
-    d = 0
-    for l in alphabet_data:
-        if d == 0:
-            alphabet.append(l)
-            d = e = (e + 1) % 4
-        else:
-            d -= 1
-    url = ''
-    f = 0
-    e = 3
-    b = 1
-    for letter in url_data:
-        if f == 0:
-            l = int(letter) * 10
-            f = 1
-        else:
-            if e == 0:
-                l += int(letter)
-                url += alphabet[l]
-                e = (b + 3) % 4
-                f = 0
-                b += 1
-            else:
-                e -= 1
-
-    return url
+_bytes_to_chr = (lambda x: x) if sys.version_info[0] == 2 else (lambda x: map(chr, x))


 class RTVEALaCartaIE(InfoExtractor):
@ -79,28 +37,31 @@ class RTVEALaCartaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia',
            'duration': 5024.566,
+            'series': 'Balonmano',
        },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
    }, {
        'note': 'Live stream',
        'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/',
        'info_dict': {
            'id': '1694255',
-            'ext': 'flv',
-            'title': 'TODO',
+            'ext': 'mp4',
+            'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+            'is_live': True,
+        },
+        'params': {
+            'skip_download': 'live stream',
        },
-        'skip': 'The f4m manifest can\'t be used yet',
    }, {
        'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/',
-        'md5': 'e55e162379ad587e9640eda4f7353c0f',
+        'md5': 'd850f3c8731ea53952ebab489cf81cbf',
        'info_dict': {
            'id': '4236788',
            'ext': 'mp4',
-            'title': 'Servir y proteger - Capítulo 104 ',
+            'title': 'Servir y proteger - Capítulo 104',
            'duration': 3222.0,
        },
-        'params': {
-            'skip_download': True,  # requires ffmpeg
-        },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
    }, {
        'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve',
        'only_matching': True,
@ -111,58 +72,102 @@ class RTVEALaCartaIE(InfoExtractor):

    def _real_initialize(self):
        user_agent_b64 = base64.b64encode(std_headers['User-Agent'].encode('utf-8')).decode('utf-8')
-        manager_info = self._download_json(
+        self._manager = self._download_json(
            'http://www.rtve.es/odin/loki/' + user_agent_b64,
-            None, 'Fetching manager info')
-        self._manager = manager_info['manager']
+            None, 'Fetching manager info')['manager']
+
+    @staticmethod
+    def _decrypt_url(png):
+        encrypted_data = io.BytesIO(compat_b64decode(png)[8:])
+        while True:
+            length = compat_struct_unpack('!I', encrypted_data.read(4))[0]
+            chunk_type = encrypted_data.read(4)
+            if chunk_type == b'IEND':
+                break
+            data = encrypted_data.read(length)
+            if chunk_type == b'tEXt':
+                alphabet_data, text = data.split(b'\0')
+                quality, url_data = text.split(b'%%')
+                alphabet = []
+                e = 0
+                d = 0
+                for l in _bytes_to_chr(alphabet_data):
+                    if d == 0:
+                        alphabet.append(l)
+                        d = e = (e + 1) % 4
+                    else:
+                        d -= 1
+                url = ''
+                f = 0
+                e = 3
+                b = 1
+                for letter in _bytes_to_chr(url_data):
+                    if f == 0:
+                        l = int(letter) * 10
+                        f = 1
+                    else:
+                        if e == 0:
+                            l += int(letter)
+                            url += alphabet[l]
+                            e = (b + 3) % 4
+                            f = 0
+                            b += 1
+                        else:
+                            e -= 1
+
+                yield quality.decode(), url
+            encrypted_data.read(4)  # CRC
+
+    def _extract_png_formats(self, video_id):
+        png = self._download_webpage(
+            'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id),
+            video_id, 'Downloading url information', query={'q': 'v2'})
+        q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL'])
+        formats = []
+        for quality, video_url in self._decrypt_url(png):
+            ext = determine_ext(video_url)
+            if ext == 'm3u8':
+                formats.extend(self._extract_m3u8_formats(
+                    video_url, video_id, 'mp4', 'm3u8_native',
+                    m3u8_id='hls', fatal=False))
+            elif ext == 'mpd':
+                formats.extend(self._extract_mpd_formats(
+                    video_url, video_id, 'dash', fatal=False))
+            else:
+                formats.append({
+                    'format_id': quality,
+                    'quality': q(quality),
+                    'url': video_url,
+                })
+        self._sort_formats(formats)
+        return formats

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
        info = self._download_json(
            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
            video_id)['page']['items'][0]
        if info['state'] == 'DESPU':
            raise ExtractorError('The video is no longer available', expected=True)
-        title = info['title']
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/%s/videos/%s.png' % (self._manager, video_id)
-        png_request = sanitized_Request(png_url)
-        png_request.add_header('Referer', url)
-        png = self._download_webpage(png_request, video_id, 'Downloading url information')
-        video_url = _decrypt_url(png)
-        ext = determine_ext(video_url)
-
-        formats = []
-        if not video_url.endswith('.f4m') and ext != 'm3u8':
-            if '?' not in video_url:
-                video_url = video_url.replace('resources/', 'auth/resources/')
-            video_url = video_url.replace('.net.rtve', '.multimedia.cdn.rtve')
-
-        if ext == 'm3u8':
-            formats.extend(self._extract_m3u8_formats(
-                video_url, video_id, ext='mp4', entry_protocol='m3u8_native',
-                m3u8_id='hls', fatal=False))
-        elif ext == 'f4m':
-            formats.extend(self._extract_f4m_formats(
-                video_url, video_id, f4m_id='hds', fatal=False))
-        else:
-            formats.append({
-                'url': video_url,
-            })
-        self._sort_formats(formats)
+        title = info['title'].strip()
+        formats = self._extract_png_formats(video_id)

        subtitles = None
-        if info.get('sbtFile') is not None:
-            subtitles = self.extract_subtitles(video_id, info['sbtFile'])
+        sbt_file = info.get('sbtFile')
+        if sbt_file:
+            subtitles = self.extract_subtitles(video_id, sbt_file)
+
+        is_live = info.get('live') is True

        return {
            'id': video_id,
-            'title': title,
+            'title': self._live_title(title) if is_live else title,
            'formats': formats,
            'thumbnail': info.get('image'),
-            'page_url': url,
            'subtitles': subtitles,
-            'duration': float_or_none(info.get('duration'), scale=1000),
+            'duration': float_or_none(info.get('duration'), 1000),
+            'is_live': is_live,
+            'series': info.get('programTitle'),
        }

    def _get_subtitles(self, video_id, sub_file):
@ -174,48 +179,26 @@ class RTVEALaCartaIE(InfoExtractor):
            for s in subs)


-class RTVEInfantilIE(InfoExtractor):
+class RTVEInfantilIE(RTVEALaCartaIE):
    IE_NAME = 'rtve.es:infantil'
    IE_DESC = 'RTVE infantil'
-    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/(?P<show>[^/]*)/video/(?P<short_title>[^/]*)/(?P<id>[0-9]+)/'
+    _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/'

    _TESTS = [{
        'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/',
-        'md5': '915319587b33720b8e0357caaa6617e6',
+        'md5': '5747454717aedf9f9fdf212d1bcfc48d',
        'info_dict': {
            'id': '3040283',
            'ext': 'mp4',
            'title': 'Maneras de vivir',
-            'thumbnail': 'http://www.rtve.es/resources/jpg/6/5/1426182947956.JPG',
+            'thumbnail': r're:https?://.+/1426182947956\.JPG',
            'duration': 357.958,
        },
+        'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'],
    }]

-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        info = self._download_json(
-            'http://www.rtve.es/api/videos/%s/config/alacarta_videos.json' % video_id,
-            video_id)['page']['items'][0]

-        webpage = self._download_webpage(url, video_id)
-        vidplayer_id = self._search_regex(
-            r' id="vidplayer([0-9]+)"', webpage, 'internal video ID')
-
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/default/videos/%s.png' % vidplayer_id
-        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        video_url = _decrypt_url(png)
-
-        return {
-            'id': video_id,
-            'ext': 'mp4',
-            'title': info['title'],
-            'url': video_url,
-            'thumbnail': info.get('image'),
-            'duration': float_or_none(info.get('duration'), scale=1000),
-        }
-
-
-class RTVELiveIE(InfoExtractor):
+class RTVELiveIE(RTVEALaCartaIE):
    IE_NAME = 'rtve.es:live'
    IE_DESC = 'RTVE.es live streams'
    _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)'
@ -225,7 +208,7 @@ class RTVELiveIE(InfoExtractor):
        'info_dict': {
            'id': 'la-1',
            'ext': 'mp4',
-            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2}Z[0-9]{6}$',
+            'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
        },
        'params': {
            'skip_download': 'live stream',
@ -234,29 +217,22 @@ class RTVELiveIE(InfoExtractor):

    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
-        start_time = time.gmtime()
        video_id = mobj.group('id')

        webpage = self._download_webpage(url, video_id)
        title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es')
        title = remove_start(title, 'Estoy viendo ')
-        title += ' ' + time.strftime('%Y-%m-%dZ%H%M%S', start_time)

        vidplayer_id = self._search_regex(
            (r'playerId=player([0-9]+)',
             r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)',
             r'data-id=["\'](\d+)'),
            webpage, 'internal video ID')
-        png_url = 'http://www.rtve.es/ztnr/movil/thumbnail/amonet/videos/%s.png' % vidplayer_id
-        png = self._download_webpage(png_url, video_id, 'Downloading url information')
-        m3u8_url = _decrypt_url(png)
-        formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
-        self._sort_formats(formats)

        return {
            'id': video_id,
-            'title': title,
-            'formats': formats,
+            'title': self._live_title(title),
+            'formats': self._extract_png_formats(vidplayer_id),
            'is_live': True,
        }

--- a/yt_dlp/extractor/shahid.py
+++ b/yt_dlp/extractor/shahid.py
@ -51,13 +51,16 @@ class ShahidIE(ShahidBaseIE):
    _NETRC_MACHINE = 'shahid'
    _VALID_URL = r'https?://shahid\.mbc\.net/ar/(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
    _TESTS = [{
-        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AC%D9%84%D8%B3-%D8%A7%D9%84%D8%B4%D8%A8%D8%A7%D8%A8-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-275286',
+        'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
        'info_dict': {
-            'id': '275286',
+            'id': '816924',
            'ext': 'mp4',
-            'title': 'مجلس الشباب الموسم 1 كليب 1',
-            'timestamp': 1506988800,
-            'upload_date': '20171003',
+            'title': 'متحف الدحيح الموسم 1 كليب 1',
+            'timestamp': 1602806400,
+            'upload_date': '20201016',
+            'description': 'برومو',
+            'duration': 22,
+            'categories': ['كوميديا'],
        },
        'params': {
            # m3u8 download
@ -109,12 +112,15 @@ class ShahidIE(ShahidBaseIE):
            page_type = 'episode'

        playout = self._call_api(
-            'playout/url/' + video_id, video_id)['playout']
+            'playout/new/url/' + video_id, video_id)['playout']

        if not self._downloader.params.get('allow_unplayable_formats') and playout.get('drm'):
            raise ExtractorError('This video is DRM protected.', expected=True)

-        formats = self._extract_m3u8_formats(playout['url'], video_id, 'mp4')
+        formats = self._extract_m3u8_formats(re.sub(
+            # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
+            r'aws\.manifestfilter=[\w:;,-]+&?',
+            '', playout['url']), video_id, 'mp4')
        self._sort_formats(formats)

        # video = self._call_api(
--- a/yt_dlp/extractor/southpark.py
+++ b/yt_dlp/extractor/southpark.py
@ -6,9 +6,9 @@ from .mtv import MTVServicesInfoExtractor

 class SouthParkIE(MTVServicesInfoExtractor):
    IE_NAME = 'southpark.cc.com'
-    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark\.cc\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'
+    _VALID_URL = r'https?://(?:www\.)?(?P<url>southpark(?:\.cc|studios)\.com/(?:clips|(?:full-)?episodes|collections)/(?P<id>.+?)(\?|#|$))'

-    _FEED_URL = 'http://www.southparkstudios.com/feeds/video-player/mrss'
+    _FEED_URL = 'http://feeds.mtvnservices.com/od/feed/intl-mrss-player-feed'

    _TESTS = [{
        'url': 'http://southpark.cc.com/clips/104437/bat-daded#tab=featured',
@ -23,8 +23,20 @@ class SouthParkIE(MTVServicesInfoExtractor):
    }, {
        'url': 'http://southpark.cc.com/collections/7758/fan-favorites/1',
        'only_matching': True,
+    }, {
+        'url': 'https://www.southparkstudios.com/episodes/h4o269/south-park-stunning-and-brave-season-19-ep-1',
+        'only_matching': True,
    }]

+    def _get_feed_query(self, uri):
+        return {
+            'accountOverride': 'intl.mtvi.com',
+            'arcEp': 'shared.southpark.global',
+            'ep': '90877963',
+            'imageEp': 'shared.southpark.global',
+            'mgid': uri,
+        }
+

 class SouthParkEsIE(SouthParkIE):
    IE_NAME = 'southpark.cc.com:español'
--- a/yt_dlp/extractor/sportdeutschland.py
+++ b/yt_dlp/extractor/sportdeutschland.py
@ -1,82 +1,105 @@
 # coding: utf-8
 from __future__ import unicode_literals

-import re
-
 from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
 from ..utils import (
+    clean_html,
+    float_or_none,
+    int_or_none,
    parse_iso8601,
-    sanitized_Request,
+    strip_or_none,
+    try_get,
 )


 class SportDeutschlandIE(InfoExtractor):
-    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<sport>[^/?#]+)/(?P<id>[^?#/]+)(?:$|[?#])'
+    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
    _TESTS = [{
        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
        'info_dict': {
-            'id': 're-live-deutsche-meisterschaften-2020-halbfinals',
+            'id': '5318cac0275701382770543d7edaf0a0',
            'ext': 'mp4',
-            'title': 're:Re-live: Deutsche Meisterschaften 2020.*Halbfinals',
-            'categories': ['Badminton-Deutschland'],
-            'view_count': int,
-            'thumbnail': r're:^https?://.*\.(?:jpg|png)$',
-            'timestamp': int,
-            'upload_date': '20200201',
-            'description': 're:.*',  # meaningless description for THIS video
+            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
+            'duration': 16106.36,
        },
+        'params': {
+            'noplaylist': True,
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
+        'info_dict': {
+            'id': 'c6e2fdd01f63013854c47054d2ab776f',
+            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
+            'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
+            'duration': 31397,
+        },
+        'playlist_count': 2,
+    }, {
+        'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        sport_id = mobj.group('sport')
-
-        api_url = 'https://proxy.vidibusdynamic.net/ssl/backend.sportdeutschland.tv/api/permalinks/%s/%s?access_token=true' % (
-            sport_id, video_id)
-        req = sanitized_Request(api_url, headers={
-            'Accept': 'application/vnd.vidibus.v2.html+json',
-            'Referer': url,
-        })
-        data = self._download_json(req, video_id)
-
+        display_id = self._match_id(url)
+        data = self._download_json(
+            'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
+            display_id, query={'access_token': 'true'})
        asset = data['asset']
-        categories = [data['section']['title']]
-
-        formats = []
-        smil_url = asset['video']
-        if '.smil' in smil_url:
-            m3u8_url = smil_url.replace('.smil', '.m3u8')
-            formats.extend(
-                self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'))
-
-            smil_doc = self._download_xml(
-                smil_url, video_id, note='Downloading SMIL metadata')
-            base_url_el = smil_doc.find('./head/meta')
-            if base_url_el:
-                base_url = base_url_el.attrib['base']
-            formats.extend([{
-                'format_id': 'rmtp',
-                'url': base_url if base_url_el else n.attrib['src'],
-                'play_path': n.attrib['src'],
-                'ext': 'flv',
-                'preference': -100,
-                'format_note': 'Seems to fail at example stream',
-            } for n in smil_doc.findall('./body/video')])
-        else:
-            formats.append({'url': smil_url})
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'formats': formats,
-            'title': asset['title'],
-            'thumbnail': asset.get('image'),
-            'description': asset.get('teaser'),
-            'duration': asset.get('duration'),
-            'categories': categories,
-            'view_count': asset.get('views'),
-            'rtmp_live': asset.get('live'),
-            'timestamp': parse_iso8601(asset.get('date')),
+        title = (asset.get('title') or asset['label']).strip()
+        asset_id = asset.get('id') or asset.get('uuid')
+        info = {
+            'id': asset_id,
+            'title': title,
+            'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
+            'duration': int_or_none(asset.get('seconds')),
        }
+        videos = asset.get('videos') or []
+        if len(videos) > 1:
+            playlist_id = compat_parse_qs(compat_urllib_parse_urlparse(url).query).get('playlistId', [None])[0]
+            if playlist_id:
+                if self._downloader.params.get('noplaylist'):
+                    videos = [videos[int(playlist_id)]]
+                    self.to_screen('Downloading just a single video because of --no-playlist')
+                else:
+                    self.to_screen('Downloading playlist %s - add --no-playlist to just download video' % asset_id)
+
+            def entries():
+                for i, video in enumerate(videos, 1):
+                    video_id = video.get('uuid')
+                    video_url = video.get('url')
+                    if not (video_id and video_url):
+                        continue
+                    formats = self._extract_m3u8_formats(
+                        video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
+                    if not formats:
+                        continue
+                    yield {
+                        'id': video_id,
+                        'formats': formats,
+                        'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
+                        'duration': float_or_none(video.get('duration')),
+                    }
+            info.update({
+                '_type': 'multi_video',
+                'entries': entries(),
+            })
+        else:
+            formats = self._extract_m3u8_formats(
+                videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
+            section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
+            info.update({
+                'formats': formats,
+                'display_id': asset.get('permalink'),
+                'thumbnail': try_get(asset, lambda x: x['images'][0]),
+                'categories': [section_title] if section_title else None,
+                'view_count': int_or_none(asset.get('views')),
+                'is_live': asset.get('is_live') is True,
+                'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
+            })
+        return info
--- a/yt_dlp/extractor/tver.py
+++ b/yt_dlp/extractor/tver.py
@ -9,6 +9,7 @@ from ..utils import (
    int_or_none,
    remove_start,
    smuggle_url,
+    strip_or_none,
    try_get,
 )

@ -25,6 +26,10 @@ class TVerIE(InfoExtractor):
    }, {
        'url': 'https://tver.jp/episode/79622438',
        'only_matching': True,
+    }, {
+        # subtitle = ' '
+        'url': 'https://tver.jp/corner/f0068870',
+        'only_matching': True,
    }]
    _TOKEN = None
    BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
@ -47,8 +52,12 @@ class TVerIE(InfoExtractor):
        }

        if service == 'cx':
+            title = main['title']
+            subtitle = strip_or_none(main.get('subtitle'))
+            if subtitle:
+                title += ' - ' + subtitle
            info.update({
-                'title': main.get('subtitle') or main['title'],
+                'title': title,
                'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id),
                'ie_key': 'FujiTVFODPlus7',
            })
--- a/yt_dlp/extractor/voxmedia.py
+++ b/yt_dlp/extractor/voxmedia.py
@ -7,6 +7,8 @@ from ..compat import compat_urllib_parse_unquote
 from ..utils import (
    ExtractorError,
    int_or_none,
+    try_get,
+    unified_timestamp,
 )


@ -19,14 +21,17 @@ class VoxMediaVolumeIE(OnceIE):

        setup = self._parse_json(self._search_regex(
            r'setup\s*=\s*({.+});', webpage, 'setup'), video_id)
-        video_data = setup.get('video') or {}
+        player_setup = setup.get('player_setup') or setup
+        video_data = player_setup.get('video') or {}
+        formatted_metadata = video_data.get('formatted_metadata') or {}
        info = {
            'id': video_id,
-            'title': video_data.get('title_short'),
+            'title': player_setup.get('title') or video_data.get('title_short'),
            'description': video_data.get('description_long') or video_data.get('description_short'),
-            'thumbnail': video_data.get('brightcove_thumbnail')
+            'thumbnail': formatted_metadata.get('thumbnail') or video_data.get('brightcove_thumbnail'),
+            'timestamp': unified_timestamp(formatted_metadata.get('video_publish_date')),
        }
-        asset = setup.get('asset') or setup.get('params') or {}
+        asset = try_get(setup, lambda x: x['embed_assets']['chorus'], dict) or {}

        formats = []
        hls_url = asset.get('hls_url')
@ -47,6 +52,7 @@ class VoxMediaVolumeIE(OnceIE):
        if formats:
            self._sort_formats(formats)
            info['formats'] = formats
+            info['duration'] = int_or_none(asset.get('duration'))
            return info

        for provider_video_type in ('ooyala', 'youtube', 'brightcove'):
@ -84,7 +90,7 @@ class VoxMediaIE(InfoExtractor):
    }, {
        # Volume embed, Youtube
        'url': 'http://www.theverge.com/2014/10/21/7025853/google-nexus-6-hands-on-photos-video-android-phablet',
-        'md5': '4c8f4a0937752b437c3ebc0ed24802b5',
+        'md5': 'fd19aa0cf3a0eea515d4fd5c8c0e9d68',
        'info_dict': {
            'id': 'Gy8Md3Eky38',
            'ext': 'mp4',
@ -93,6 +99,7 @@ class VoxMediaIE(InfoExtractor):
            'uploader_id': 'TheVerge',
            'upload_date': '20141021',
            'uploader': 'The Verge',
+            'timestamp': 1413907200,
        },
        'add_ie': ['Youtube'],
        'skip': 'similar to the previous test',
@ -100,13 +107,13 @@ class VoxMediaIE(InfoExtractor):
        # Volume embed, Youtube
        'url': 'http://www.vox.com/2016/3/31/11336640/mississippi-lgbt-religious-freedom-bill',
        'info_dict': {
-            'id': 'YCjDnX-Xzhg',
+            'id': '22986359b',
            'ext': 'mp4',
            'title': "Mississippi's laws are so bad that its anti-LGBTQ law isn't needed to allow discrimination",
            'description': 'md5:fc1317922057de31cd74bce91eb1c66c',
-            'uploader_id': 'voxdotcom',
            'upload_date': '20150915',
-            'uploader': 'Vox',
+            'timestamp': 1442332800,
+            'duration': 285,
        },
        'add_ie': ['Youtube'],
        'skip': 'similar to the previous test',
@ -160,6 +167,9 @@ class VoxMediaIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Post-Post-PC CEO: The Full Code Conference Video of Microsoft\'s Satya Nadella',
            'description': 'The longtime veteran was chosen earlier this year as the software giant\'s third leader in its history.',
+            'timestamp': 1402938000,
+            'upload_date': '20140616',
+            'duration': 4114,
        },
        'add_ie': ['VoxMediaVolume'],
    }]