diff --git a/AUTHORS b/AUTHORS index 8362b6d8a6..1596a75482 100644 --- a/AUTHORS +++ b/AUTHORS @@ -105,3 +105,4 @@ Dinesh S Johan K. Jensen Yen Chi Hsuan Enam Mijbah Noor +David Luhmer diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 388c55e991..4782326826 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree from ..compat import ( compat_cookiejar, + compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse_urlparse, @@ -26,6 +27,7 @@ from ..utils import ( compiled_regex_type, ExtractorError, float_or_none, + HEADRequest, int_or_none, RegexNotFoundError, sanitize_filename, @@ -716,6 +718,27 @@ class InfoExtractor(object): ) formats.sort(key=_formats_key) + def _check_formats(self, formats, video_id): + if formats: + formats[:] = filter( + lambda f: self._is_valid_url( + f['url'], video_id, + item='%s video format' % f.get('format_id') if f.get('format_id') else 'video'), + formats) + + def _is_valid_url(self, url, video_id, item='video'): + try: + self._request_webpage( + HEADRequest(url), video_id, + 'Checking %s URL' % item) + return True + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError): + self.report_warning( + '%s URL is invalid, skipping' % item, video_id) + return False + raise + def http_scheme(self): """ Either "http:" or "https:", depending on the user's preferences """ return ( diff --git a/youtube_dl/extractor/drtv.py b/youtube_dl/extractor/drtv.py index c44adb1099..510ef04b06 100644 --- a/youtube_dl/extractor/drtv.py +++ b/youtube_dl/extractor/drtv.py @@ -48,14 +48,20 @@ class DRTVIE(SubtitlesInfoExtractor): elif asset['Kind'] == 'VideoResource': duration = asset['DurationInMilliseconds'] / 1000.0 restricted_to_denmark = asset['RestrictedToDenmark'] + spoken_subtitles = asset['Target'] == 'SpokenSubtitles' for link in asset['Links']: target = link['Target'] uri = link['Uri'] + format_id = target + preference = -1 if target == 'HDS' else -2 + if spoken_subtitles: + preference -= 2 + format_id += '-spoken-subtitles' formats.append({ 'url': uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43' if target == 'HDS' else uri, - 'format_id': target, + 'format_id': format_id, 'ext': link['FileFormat'], - 'preference': -1 if target == 'HDS' else -2, + 'preference': preference, }) subtitles_list = asset.get('SubtitlesList') if isinstance(subtitles_list, list): diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 26e84970d4..762cefa34e 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -85,6 +85,7 @@ class LyndaIE(SubtitlesInfoExtractor): } for format_id, video_url in prioritized_streams['0'].items() ]) + self._check_formats(formats, video_id) self._sort_formats(formats) if self._downloader.params.get('listsubtitles', False): diff --git a/youtube_dl/extractor/spiegel.py b/youtube_dl/extractor/spiegel.py index f345883c76..b868241d50 100644 --- a/youtube_dl/extractor/spiegel.py +++ b/youtube_dl/extractor/spiegel.py @@ -4,14 +4,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urlparse, - compat_HTTPError, -) -from ..utils import ( - HEADRequest, - ExtractorError, -) +from ..compat import compat_urlparse from .spiegeltv import SpiegeltvIE @@ -72,16 +65,6 @@ class SpiegelIE(InfoExtractor): if n.tag.startswith('type') and n.tag != 'type6': format_id = n.tag.rpartition('type')[2] video_url = base_url + n.find('./filename').text - # Test video URLs beforehand as some of them are invalid - try: - self._request_webpage( - HEADRequest(video_url), video_id, - 'Checking %s video URL' % format_id) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: - self.report_warning( - '%s video URL is invalid, skipping' % format_id, video_id) - continue formats.append({ 'format_id': format_id, 'url': video_url, @@ -94,6 +77,7 @@ class SpiegelIE(InfoExtractor): }) duration = float(idoc[0].findall('./duration')[0].text) + self._check_formats(formats, video_id) self._sort_formats(formats) return {