From 637570326bfa12575fe210e52e2a39d6585891d8 Mon Sep 17 00:00:00 2001
From: Antti Ajanki <>
Date: Sun, 22 Feb 2015 10:16:51 +0200
Subject: [PATCH 1/5] [extractor/common] Extract the first of a seq of videos
 in a .smil file

 youtube_dl/extractor/ | 68 +++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/youtube_dl/extractor/ b/youtube_dl/extractor/
index 87fce9cd89..4fe99d25de 100644
--- a/youtube_dl/extractor/
+++ b/youtube_dl/extractor/
@@ -921,39 +921,49 @@ class InfoExtractor(object):
         formats = []
         rtmp_count = 0
-        for video in smil.findall('./body/switch/video'):
-            src = video.get('src')
-            if not src:
-                continue
-            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
-            width = int_or_none(video.get('width'))
-            height = int_or_none(video.get('height'))
-            proto = video.get('proto')
-            if not proto:
-                if base:
-                    if base.startswith('rtmp'):
-                        proto = 'rtmp'
-                    elif base.startswith('http'):
-                        proto = 'http'
-            ext = video.get('ext')
-            if proto == 'm3u8':
-                formats.extend(self._extract_m3u8_formats(src, video_id, ext))
-            elif proto == 'rtmp':
-                rtmp_count += 1
-                streamer = video.get('streamer') or base
-                formats.append({
-                    'url': streamer,
-                    'play_path': src,
-                    'ext': 'flv',
-                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
-                    'tbr': bitrate,
-                    'width': width,
-                    'height': height,
-                })
+        if smil.findall('./body/seq/video'):
+            video = smil.findall('./body/seq/video')[0]
+            fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
+            formats.extend(fmts)
+        else:
+            for video in smil.findall('./body/switch/video'):
+                fmts, rtmp_count = self._parse_smil_video(video, base, rtmp_count)
+                formats.extend(fmts)
         return formats
+    def _parse_smil_video(self, video, base, rtmp_count):
+        src = video.get('src')
+        if not src:
+            return ([], rtmp_count)
+        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
+        width = int_or_none(video.get('width'))
+        height = int_or_none(video.get('height'))
+        proto = video.get('proto')
+        if not proto:
+            if base:
+                if base.startswith('rtmp'):
+                    proto = 'rtmp'
+                elif base.startswith('http'):
+                    proto = 'http'
+        ext = video.get('ext')
+        if proto == 'm3u8':
+            return (self._extract_m3u8_formats(src, video_id, ext), rtmp_count)
+        elif proto == 'rtmp':
+            rtmp_count += 1
+            streamer = video.get('streamer') or base
+            return ([{
+                'url': streamer,
+                'play_path': src,
+                'ext': 'flv',
+                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
     def _live_title(self, name):
         """ Generate the title for a live video """
         now =

From 6f4ba54079893a09c6aa78fe3420523fb96df858 Mon Sep 17 00:00:00 2001
From: Antti Ajanki <>
Date: Sun, 22 Feb 2015 10:18:36 +0200
Subject: [PATCH 2/5] [extractor/common] Extract HTTP (possibly f4m) URLs from
 a .smil file

 youtube_dl/extractor/ | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/youtube_dl/extractor/ b/youtube_dl/extractor/
index 4fe99d25de..3136882083 100644
--- a/youtube_dl/extractor/
+++ b/youtube_dl/extractor/
@@ -963,6 +963,14 @@ class InfoExtractor(object):
                 'width': width,
                 'height': height,
             }], rtmp_count)
+        elif proto.startswith('http'):
+            return ([{
+                'url': base + src,
+                'ext': ext or 'flv',
+                'tbr': bitrate,
+                'width': width,
+                'height': height,
+            }], rtmp_count)
     def _live_title(self, name):
         """ Generate the title for a live video """

From c4f8c453ae2f735fc2320856e15e66510d74fd72 Mon Sep 17 00:00:00 2001
From: Antti Ajanki <>
Date: Sun, 22 Feb 2015 21:03:49 +0200
Subject: [PATCH 3/5] [f4m] Refresh fragment list periodically on live streams

 youtube_dl/downloader/ | 59 ++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/youtube_dl/downloader/ b/youtube_dl/downloader/
index 7b8fe8cf57..1df9ebe5b2 100644
--- a/youtube_dl/downloader/
+++ b/youtube_dl/downloader/
@@ -121,7 +121,8 @@ class FlvReader(io.BytesIO):
         self.read_unsigned_int()  # BootstrapinfoVersion
         # Profile,Live,Update,Reserved
+        flags = self.read_unsigned_char()
+        live = flags & 0x20 != 0
         # time scale
         # CurrentMediaTime
@@ -160,6 +161,7 @@ class FlvReader(io.BytesIO):
         return {
             'segments': segments,
             'fragments': fragments,
+            'live': live,
     def read_bootstrap_info(self):
@@ -182,6 +184,10 @@ def build_fragments_list(boot_info):
     for segment, fragments_count in segment_run_table['segment_run']:
         for _ in range(fragments_count):
             res.append((segment, next(fragments_counter)))
+    if boot_info['live']:
+        res = res[-2:]
     return res
@@ -246,6 +252,38 @@ class F4mFD(FileDownloader):
             self.report_error('Unsupported DRM')
         return media
+    def _get_bootstrap_from_url(self, bootstrap_url):
+        bootstrap = self.ydl.urlopen(bootstrap_url).read()
+        return read_bootstrap_info(bootstrap)
+    def _update_live_fragments(self, bootstrap_url, latest_fragment):
+        fragments_list = []
+        retries = 30
+        while (not fragments_list) and (retries > 0):
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+            fragments_list = build_fragments_list(boot_info)
+            fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+            if not fragments_list:
+                # Retry after a while
+                time.sleep(5.0)
+                retries -= 1
+        if not fragments_list:
+            self.report_error('Failed to update fragments')
+        return fragments_list
+    def _parse_bootstrap_node(self, node, base_url):
+        if node.text is None:
+            bootstrap_url = compat_urlparse.urljoin(
+                base_url, node.attrib['url'])
+            boot_info = self._get_bootstrap_from_url(bootstrap_url)
+        else:
+            bootstrap_url = None
+            bootstrap = base64.b64decode(node.text)
+            boot_info = read_bootstrap_info(bootstrap)
+        return (boot_info, bootstrap_url)
     def real_download(self, filename, info_dict):
         man_url = info_dict['url']
         requested_bitrate = info_dict.get('tbr')
@@ -265,18 +303,13 @@ class F4mFD(FileDownloader):
         base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
         bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
-        if bootstrap_node.text is None:
-            bootstrap_url = compat_urlparse.urljoin(
-                base_url, bootstrap_node.attrib['url'])
-            bootstrap = self.ydl.urlopen(bootstrap_url).read()
-        else:
-            bootstrap = base64.b64decode(bootstrap_node.text)
+        boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+        live = boot_info['live']
         metadata_node = media.find(_add_ns('metadata'))
         if metadata_node is not None:
             metadata = base64.b64decode(metadata_node.text)
             metadata = None
-        boot_info = read_bootstrap_info(bootstrap)
         fragments_list = build_fragments_list(boot_info)
         if self.params.get('test', False):
@@ -301,7 +334,8 @@ class F4mFD(FileDownloader):
         (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
-        write_metadata_tag(dest_stream, metadata)
+        if not live:
+            write_metadata_tag(dest_stream, metadata)
         # This dict stores the download progress, it's updated by the progress
         # hook
@@ -348,7 +382,8 @@ class F4mFD(FileDownloader):
         frags_filenames = []
-        for (seg_i, frag_i) in fragments_list:
+        while fragments_list:
+            seg_i, frag_i = fragments_list.pop(0)
             name = 'Seg%d-Frag%d' % (seg_i, frag_i)
             url = base_url + name
             if akamai_pv:
@@ -367,6 +402,10 @@ class F4mFD(FileDownloader):
+            if not fragments_list and live and bootstrap_url:
+                fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+                self.to_screen('Updated available fragments: %d' % len(fragments_list))
         elapsed = time.time() - start

From 5eaaeb7c317a543af4bde5eb7d465f3695fc97d9 Mon Sep 17 00:00:00 2001
From: Antti Ajanki <>
Date: Mon, 23 Feb 2015 21:56:35 +0200
Subject: [PATCH 4/5] [f4m] Tolerate missed fragments on live streams

 youtube_dl/downloader/ | 43 +++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/youtube_dl/downloader/ b/youtube_dl/downloader/
index 1df9ebe5b2..3dc796faaf 100644
--- a/youtube_dl/downloader/
+++ b/youtube_dl/downloader/
@@ -11,6 +11,7 @@ from .common import FileDownloader
 from .http import HttpFD
 from ..compat import (
+    compat_urllib_error,
 from ..utils import (
@@ -389,22 +390,38 @@ class F4mFD(FileDownloader):
             if akamai_pv:
                 url += '?' + akamai_pv.strip(';')
             frag_filename = '%s-%s' % (tmpfilename, name)
-            success =, {'url': url})
-            if not success:
-                return False
-            with open(frag_filename, 'rb') as down:
-                down_data =
-                reader = FlvReader(down_data)
-                while True:
-                    _, box_type, box_data = reader.read_box_info()
-                    if box_type == b'mdat':
-                        dest_stream.write(box_data)
-                        break
-            frags_filenames.append(frag_filename)
+            try:
+                success =, {'url': url})
+                if not success:
+                    return False
+                with open(frag_filename, 'rb') as down:
+                    down_data =
+                    reader = FlvReader(down_data)
+                    while True:
+                        _, box_type, box_data = reader.read_box_info()
+                        if box_type == b'mdat':
+                            dest_stream.write(box_data)
+                            break
+                if live:
+                    os.remove(frag_filename)
+                else:
+                    frags_filenames.append(frag_filename)
+            except (compat_urllib_error.HTTPError, ) as err:
+                if live and (err.code == 404 or err.code == 410):
+                    # We didn't keep up with the live window. Continue
+                    # with the next available fragment.
+                    msg = 'Fragment %d unavailable' % frag_i
+                    self.report_warning(msg)
+                    fragments_list = []
+                else:
+                    raise
             if not fragments_list and live and bootstrap_url:
                 fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
-                self.to_screen('Updated available fragments: %d' % len(fragments_list))
+                total_frags += len(fragments_list)
+                if fragments_list and (fragments_list[0][1] > frag_i + 1):
+                    msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+                    self.report_warning(msg)

From b8988b63a6baa206b7f5e35d99a9f4eff6ec7b5e Mon Sep 17 00:00:00 2001
From: Antti Ajanki <>
Date: Tue, 24 Feb 2015 21:23:59 +0200
Subject: [PATCH 5/5] [wdr] Download a live stream

 youtube_dl/extractor/ | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/youtube_dl/extractor/ b/youtube_dl/extractor/
index c904885006..b468023060 100644
--- a/youtube_dl/extractor/
+++ b/youtube_dl/extractor/
@@ -28,6 +28,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Servicezeit',
                 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb',
                 'upload_date': '20140310',
+                'is_live': False
             'params': {
                 'skip_download': True,
@@ -41,6 +42,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Marga Spiegel ist tot',
                 'description': 'md5:2309992a6716c347891c045be50992e4',
                 'upload_date': '20140311',
+                'is_live': False
             'params': {
                 'skip_download': True,
@@ -55,6 +57,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)',
                 'description': 'md5:2309992a6716c347891c045be50992e4',
                 'upload_date': '20091129',
+                'is_live': False
@@ -66,6 +69,7 @@ class WDRIE(InfoExtractor):
                 'title': 'Flavia Coelho: Amar é Amar',
                 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a',
                 'upload_date': '20140717',
+                'is_live': False
@@ -74,6 +78,20 @@ class WDRIE(InfoExtractor):
             'info_dict': {
                 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100',
+        },
+        {
+            'url': '',
+            'info_dict': {
+                'id': 'mdb-103364',
+                'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+                'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9',
+                'ext': 'flv',
+                'upload_date': '20150212',
+                'is_live': True
+            },
+            'params': {
+                'skip_download': True,
+            },
@@ -119,6 +137,10 @@ class WDRIE(InfoExtractor):
         video_url = flashvars['dslSrc'][0]
         title = flashvars['trackerClipTitle'][0]
         thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None
+        is_live = flashvars.get('isLive', ['0'])[0] == '1'
+        if is_live:
+            title = self._live_title(title)
         if 'trackerClipAirTime' in flashvars:
             upload_date = flashvars['trackerClipAirTime'][0]
@@ -131,6 +153,13 @@ class WDRIE(InfoExtractor):
         if video_url.endswith('.f4m'):
             video_url += '?hdcore=3.2.0&plugin=aasp-'
             ext = 'flv'
+        elif video_url.endswith('.smil'):
+            fmt = self._extract_smil_formats(video_url, page_id)[0]
+            video_url = fmt['url']
+            sep = '&' if '?' in video_url else '?'
+            video_url += sep
+            video_url += 'hdcore=3.3.0&plugin=aasp-'
+            ext = fmt['ext']
             ext = determine_ext(video_url)
@@ -144,6 +173,7 @@ class WDRIE(InfoExtractor):
             'description': description,
             'thumbnail': thumbnail,
             'upload_date': upload_date,
+            'is_live': is_live