From a0bb6ce58db5b3124962037ca12e78cbd348f56c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 31 Oct 2021 13:26:44 +0530 Subject: [PATCH] [youtube] refactor itag processing --- yt_dlp/extractor/youtube.py | 56 +++++++++++++++---------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 56cd2ed8d3..64475edec0 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2434,7 +2434,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return prs, player_url def _extract_formats(self, streaming_data, video_id, player_url, is_live): - itags, stream_ids = [], [] + itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ # Normally tiny is the smallest video-only formats. But @@ -2498,7 +2498,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): throttled = True if itag: - itags.append(itag) + itags[itag] = 'https' stream_ids.append(stream_id) tbr = float_or_none( @@ -2548,46 +2548,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor): and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True)) get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True) - def guess_quality(f): - for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)): - if val in qdict: - return q(qdict[val]) - return -1 + def process_manifest_format(f, proto, itag): + if itag in itags: + if itags[itag] == proto or f'{itag}-{proto}' in itags: + return False + itag = f'{itag}-{proto}' + if itag: + f['format_id'] = itag + itags[itag] = proto + + f['quality'] = next(( + q(qdict[val]) + for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)) + if val in qdict), -1) + return True for sd in streaming_data: hls_manifest_url = get_hls and sd.get('hlsManifestUrl') if hls_manifest_url: for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False): - itag = self._search_regex( - r'/itag/(\d+)', f['url'], 'itag', default=None) - if itag in itags: - itag += '-hls' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - yield f + if process_manifest_format(f, 'hls', self._search_regex( + r'/itag/(\d+)', f['url'], 'itag', default=None)): + yield f dash_manifest_url = get_dash and sd.get('dashManifestUrl') if dash_manifest_url: for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False): - itag = f['format_id'] - if itag in itags: - itag += '-dash' - if itag in itags: - continue - if itag: - f['format_id'] = itag - itags.append(itag) - f['quality'] = guess_quality(f) - filesize = int_or_none(self._search_regex( - r'/clen/(\d+)', f.get('fragment_base_url') - or f['url'], 'file size', default=None)) - if filesize: - f['filesize'] = filesize - yield f + if process_manifest_format(f, 'dash', f['format_id']): + f['filesize'] = int_or_none(self._search_regex( + r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None)) + yield f def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {})