mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-11-10 07:04:38 +00:00
[ie] Support multi-period MPD streams (#6654)
This commit is contained in:
parent
ffff1bc659
commit
4ce57d3b87
2 changed files with 54 additions and 14 deletions
|
@ -3483,7 +3483,8 @@ class YoutubeDL:
|
||||||
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None,
|
||||||
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
'Possible MPEG-TS in MP4 container or malformed AAC timestamps',
|
||||||
FFmpegFixupM3u8PP)
|
FFmpegFixupM3u8PP)
|
||||||
ffmpeg_fixup(info_dict.get('is_live') and downloader == 'dashsegments',
|
ffmpeg_fixup(downloader == 'dashsegments'
|
||||||
|
and (info_dict.get('is_live') or info_dict.get('is_dash_periods')),
|
||||||
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP)
|
||||||
|
|
||||||
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP)
|
||||||
|
|
|
@ -247,6 +247,8 @@ class InfoExtractor:
|
||||||
(For internal use only)
|
(For internal use only)
|
||||||
* http_chunk_size Chunk size for HTTP downloads
|
* http_chunk_size Chunk size for HTTP downloads
|
||||||
* ffmpeg_args Extra arguments for ffmpeg downloader
|
* ffmpeg_args Extra arguments for ffmpeg downloader
|
||||||
|
* is_dash_periods Whether the format is a result of merging
|
||||||
|
multiple DASH periods.
|
||||||
RTMP formats can also have the additional fields: page_url,
|
RTMP formats can also have the additional fields: page_url,
|
||||||
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
app, play_path, tc_url, flash_version, rtmp_live, rtmp_conn,
|
||||||
rtmp_protocol, rtmp_real_time
|
rtmp_protocol, rtmp_real_time
|
||||||
|
@ -2530,7 +2532,11 @@ class InfoExtractor:
|
||||||
self._report_ignoring_subs('DASH')
|
self._report_ignoring_subs('DASH')
|
||||||
return fmts
|
return fmts
|
||||||
|
|
||||||
def _extract_mpd_formats_and_subtitles(
|
def _extract_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||||
|
periods = self._extract_mpd_periods(*args, **kwargs)
|
||||||
|
return self._merge_mpd_periods(periods)
|
||||||
|
|
||||||
|
def _extract_mpd_periods(
|
||||||
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
self, mpd_url, video_id, mpd_id=None, note=None, errnote=None,
|
||||||
fatal=True, data=None, headers={}, query={}):
|
fatal=True, data=None, headers={}, query={}):
|
||||||
|
|
||||||
|
@ -2543,17 +2549,16 @@ class InfoExtractor:
|
||||||
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
errnote='Failed to download MPD manifest' if errnote is None else errnote,
|
||||||
fatal=fatal, data=data, headers=headers, query=query)
|
fatal=fatal, data=data, headers=headers, query=query)
|
||||||
if res is False:
|
if res is False:
|
||||||
return [], {}
|
return []
|
||||||
mpd_doc, urlh = res
|
mpd_doc, urlh = res
|
||||||
if mpd_doc is None:
|
if mpd_doc is None:
|
||||||
return [], {}
|
return []
|
||||||
|
|
||||||
# We could have been redirected to a new url when we retrieved our mpd file.
|
# We could have been redirected to a new url when we retrieved our mpd file.
|
||||||
mpd_url = urlh.url
|
mpd_url = urlh.url
|
||||||
mpd_base_url = base_url(mpd_url)
|
mpd_base_url = base_url(mpd_url)
|
||||||
|
|
||||||
return self._parse_mpd_formats_and_subtitles(
|
return self._parse_mpd_periods(mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
||||||
mpd_doc, mpd_id, mpd_base_url, mpd_url)
|
|
||||||
|
|
||||||
def _parse_mpd_formats(self, *args, **kwargs):
|
def _parse_mpd_formats(self, *args, **kwargs):
|
||||||
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
fmts, subs = self._parse_mpd_formats_and_subtitles(*args, **kwargs)
|
||||||
|
@ -2561,8 +2566,39 @@ class InfoExtractor:
|
||||||
self._report_ignoring_subs('DASH')
|
self._report_ignoring_subs('DASH')
|
||||||
return fmts
|
return fmts
|
||||||
|
|
||||||
def _parse_mpd_formats_and_subtitles(
|
def _parse_mpd_formats_and_subtitles(self, *args, **kwargs):
|
||||||
self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
periods = self._parse_mpd_periods(*args, **kwargs)
|
||||||
|
return self._merge_mpd_periods(periods)
|
||||||
|
|
||||||
|
def _merge_mpd_periods(self, periods):
|
||||||
|
"""
|
||||||
|
Combine all formats and subtitles from an MPD manifest into a single list,
|
||||||
|
by concatenate streams with similar formats.
|
||||||
|
"""
|
||||||
|
formats, subtitles = {}, {}
|
||||||
|
for period in periods:
|
||||||
|
for f in period['formats']:
|
||||||
|
assert 'is_dash_periods' not in f, 'format already processed'
|
||||||
|
f['is_dash_periods'] = True
|
||||||
|
format_key = tuple(v for k, v in f.items() if k not in (
|
||||||
|
('format_id', 'fragments', 'manifest_stream_number')))
|
||||||
|
if format_key not in formats:
|
||||||
|
formats[format_key] = f
|
||||||
|
elif 'fragments' in f:
|
||||||
|
formats[format_key].setdefault('fragments', []).extend(f['fragments'])
|
||||||
|
|
||||||
|
if subtitles and period['subtitles']:
|
||||||
|
self.report_warning(bug_reports_message(
|
||||||
|
'Found subtitles in multiple periods in the DASH manifest; '
|
||||||
|
'if part of the subtitles are missing,'
|
||||||
|
), only_once=True)
|
||||||
|
|
||||||
|
for sub_lang, sub_info in period['subtitles'].items():
|
||||||
|
subtitles.setdefault(sub_lang, []).extend(sub_info)
|
||||||
|
|
||||||
|
return list(formats.values()), subtitles
|
||||||
|
|
||||||
|
def _parse_mpd_periods(self, mpd_doc, mpd_id=None, mpd_base_url='', mpd_url=None):
|
||||||
"""
|
"""
|
||||||
Parse formats from MPD manifest.
|
Parse formats from MPD manifest.
|
||||||
References:
|
References:
|
||||||
|
@ -2641,9 +2677,13 @@ class InfoExtractor:
|
||||||
return ms_info
|
return ms_info
|
||||||
|
|
||||||
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
|
||||||
formats, subtitles = [], {}
|
|
||||||
stream_numbers = collections.defaultdict(int)
|
stream_numbers = collections.defaultdict(int)
|
||||||
for period in mpd_doc.findall(_add_ns('Period')):
|
for period_idx, period in enumerate(mpd_doc.findall(_add_ns('Period'))):
|
||||||
|
period_entry = {
|
||||||
|
'id': period.get('id', f'period-{period_idx}'),
|
||||||
|
'formats': [],
|
||||||
|
'subtitles': collections.defaultdict(list),
|
||||||
|
}
|
||||||
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
period_duration = parse_duration(period.get('duration')) or mpd_duration
|
||||||
period_ms_info = extract_multisegment_info(period, {
|
period_ms_info = extract_multisegment_info(period, {
|
||||||
'start_number': 1,
|
'start_number': 1,
|
||||||
|
@ -2893,11 +2933,10 @@ class InfoExtractor:
|
||||||
if content_type in ('video', 'audio', 'image/jpeg'):
|
if content_type in ('video', 'audio', 'image/jpeg'):
|
||||||
f['manifest_stream_number'] = stream_numbers[f['url']]
|
f['manifest_stream_number'] = stream_numbers[f['url']]
|
||||||
stream_numbers[f['url']] += 1
|
stream_numbers[f['url']] += 1
|
||||||
formats.append(f)
|
period_entry['formats'].append(f)
|
||||||
elif content_type == 'text':
|
elif content_type == 'text':
|
||||||
subtitles.setdefault(lang or 'und', []).append(f)
|
period_entry['subtitles'][lang or 'und'].append(f)
|
||||||
|
yield period_entry
|
||||||
return formats, subtitles
|
|
||||||
|
|
||||||
def _extract_ism_formats(self, *args, **kwargs):
|
def _extract_ism_formats(self, *args, **kwargs):
|
||||||
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
fmts, subs = self._extract_ism_formats_and_subtitles(*args, **kwargs)
|
||||||
|
|
Loading…
Reference in a new issue