From 21633673c33f082c6673bc245e4a90d880729a58 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 18 May 2022 09:04:30 +0530 Subject: [PATCH] [cleanup] Minor fixes --- Changelog.md | 8 ++++---- devscripts/make_lazy_extractors.py | 2 +- yt_dlp/YoutubeDL.py | 2 +- yt_dlp/__init__.py | 1 + yt_dlp/extractor/common.py | 18 ++++++++++++------ yt_dlp/extractor/fc2.py | 3 +-- yt_dlp/extractor/voicy.py | 6 +++--- yt_dlp/utils.py | 4 +++- 8 files changed, 26 insertions(+), 18 deletions(-) diff --git a/Changelog.md b/Changelog.md index 3fb6260b86..52ea033676 100644 --- a/Changelog.md +++ b/Changelog.md @@ -785,7 +785,7 @@ * [build] Improvements * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) * Release windows exe built with `py2exe` - * Enable lazy-extractors in releases. + * Enable lazy-extractors in releases * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) * Clean up error reporting in update * Refactor `pyinst.py`, misc cleanup and improve docs @@ -1393,7 +1393,7 @@ * [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz) * [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao) * [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz) -* [funimation] Extract subtitles +* [funimation] Extract subtitles * [pornhub] Extract `cast` * [hotstar] Use server time for authentication instead of local time * [EmbedThumbnail] Fix for already downloaded thumbnail @@ -1489,7 +1489,7 @@ ### 2021.05.20 -* **Youtube improvements**: +* **Youtube improvements**: * Support youtube music `MP`, `VL` and `browse` pages * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen) @@ -2031,7 +2031,7 @@ * **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details * **Format Selection:** See [Format Selection](README.md#format-selection) for details * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*` - * Changed video format sorting to show video only files and video+audio files together. + * Changed video format sorting to show video only files and video+audio files together * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams` * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively * Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details diff --git a/devscripts/make_lazy_extractors.py b/devscripts/make_lazy_extractors.py index 8c481bc2d4..cd1985c8ee 100644 --- a/devscripts/make_lazy_extractors.py +++ b/devscripts/make_lazy_extractors.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import os import optparse +import os import sys from inspect import getsource diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 31624f181a..ba08f6a7d3 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1924,7 +1924,7 @@ class YoutubeDL: and download and ( not can_merge() - or info_dict.get('is_live', False) + or info_dict.get('is_live') and not self.params.get('live_from_start') or self.outtmpl_dict['default'] == '-')) compat = ( prefer_best diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 0a8bf37b65..8f890b34a6 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -869,6 +869,7 @@ def main(argv=None): from .extractor import gen_extractors, list_extractors + __all__ = [ 'main', 'YoutubeDL', diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index ebeca43951..6a451c20bb 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1343,7 +1343,7 @@ class InfoExtractor: return self._og_search_property('url', html, **kargs) def _html_extract_title(self, html, name='title', *, fatal=False, **kwargs): - return self._html_search_regex(r'(?s)([^<]+)', html, name, fatal=fatal, **kwargs) + return self._html_search_regex(r'(?s)]*>([^<]+)', html, name, fatal=fatal, **kwargs) def _html_search_meta(self, name, html, display_name=None, fatal=False, **kwargs): name = variadic(name) @@ -1509,8 +1509,9 @@ class InfoExtractor: 'url': url_or_none(e.get('contentUrl')), 'title': unescapeHTML(e.get('name')), 'description': unescapeHTML(e.get('description')), - 'thumbnails': [{'url': url_or_none(url)} - for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))], + 'thumbnails': [{'url': url} + for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL')) + if url_or_none(url)], 'duration': parse_duration(e.get('duration')), 'timestamp': unified_timestamp(e.get('uploadDate')), # author can be an instance of 'Organization' or 'Person' types. @@ -2803,13 +2804,18 @@ class InfoExtractor: mime_type = representation_attrib['mimeType'] content_type = representation_attrib.get('contentType', mime_type.split('/')[0]) - codecs = parse_codecs(representation_attrib.get('codecs', '')) + codec_str = representation_attrib.get('codecs', '') + # Some kind of binary subtitle found in some youtube livestreams + if mime_type == 'application/x-rawcc': + codecs = {'scodec': codec_str} + else: + codecs = parse_codecs(codec_str) if content_type not in ('video', 'audio', 'text'): if mime_type == 'image/jpeg': content_type = mime_type - elif codecs['vcodec'] != 'none': + elif codecs.get('vcodec', 'none') != 'none': content_type = 'video' - elif codecs['acodec'] != 'none': + elif codecs.get('acodec', 'none') != 'none': content_type = 'audio' elif codecs.get('scodec', 'none') != 'none': content_type = 'text' diff --git a/yt_dlp/extractor/fc2.py b/yt_dlp/extractor/fc2.py index 225677b00e..54b136ec7d 100644 --- a/yt_dlp/extractor/fc2.py +++ b/yt_dlp/extractor/fc2.py @@ -10,7 +10,6 @@ from ..utils import ( WebSocketsWrapper, js_to_json, sanitized_Request, - std_headers, traverse_obj, update_url_query, urlencode_postdata, @@ -207,7 +206,7 @@ class FC2LiveIE(InfoExtractor): 'Cookie': str(self._get_cookies('https://live.fc2.com/'))[12:], 'Origin': 'https://live.fc2.com', 'Accept': '*/*', - 'User-Agent': std_headers['User-Agent'], + 'User-Agent': self.get_param('http_headers')['User-Agent'], }) self.write_debug('[debug] Sending HLS server request') diff --git a/yt_dlp/extractor/voicy.py b/yt_dlp/extractor/voicy.py index e4570a03ae..feab79138a 100644 --- a/yt_dlp/extractor/voicy.py +++ b/yt_dlp/extractor/voicy.py @@ -1,3 +1,5 @@ +import itertools + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( @@ -9,8 +11,6 @@ from ..utils import ( unsmuggle_url, ) -import itertools - class VoicyBaseIE(InfoExtractor): def _extract_from_playlist_data(self, value): @@ -105,7 +105,7 @@ class VoicyChannelIE(VoicyBaseIE): @classmethod def suitable(cls, url): - return not VoicyIE.suitable(url) and super(VoicyChannelIE, cls).suitable(url) + return not VoicyIE.suitable(url) and super().suitable(url) def _entries(self, channel_id): pager = '' diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 3b0e6750c3..bcdb7d55b6 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -714,7 +714,9 @@ def sanitize_path(s, force=False): def sanitize_url(url): # Prepend protocol-less URLs with `http:` scheme in order to mitigate # the number of unwanted failures due to missing protocol - if url.startswith('//'): + if url is None: + return + elif url.startswith('//'): return 'http:%s' % url # Fix some common typos seen so far COMMON_TYPOS = (