yt-dlp/yt_dlp/utils/_utils.py

import base64
import binascii
import calendar
import codecs
import collections
import collections.abc
import contextlib
import datetime as dt
import email.header
import email.utils
import errno
import hashlib
import hmac
import html.entities
import html.parser
import inspect
import io
import itertools
import json
import locale
import math
import mimetypes
import netrc
import operator
import os
import platform
import random
import re
import shlex
import socket
import ssl
import struct
import subprocess
import sys
import tempfile
import time
import traceback
import types
import unicodedata
import urllib.error
import urllib.parse
import urllib.request
import xml.etree.ElementTree

from . import traversal

from ..compat import functools  # isort: split
from ..compat import (
    compat_etree_fromstring,
    compat_expanduser,
    compat_HTMLParseError,
    compat_os_name,
)
from ..dependencies import xattr

__name__ = __name__.rsplit('.', 1)[0]  # noqa: A001: Pretend to be the parent module

# This is not clearly defined otherwise
compiled_regex_type = type(re.compile(''))


class NO_DEFAULT:
    pass


def IDENTITY(x):
    return x


ENGLISH_MONTH_NAMES = [
    'January', 'February', 'March', 'April', 'May', 'June',
    'July', 'August', 'September', 'October', 'November', 'December']

MONTH_NAMES = {
    'en': ENGLISH_MONTH_NAMES,
    'fr': [
        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
    # these follow the genitive grammatical case (dopełniacz)
    # some websites might be using nominative, which will require another month list
    # https://en.wikibooks.org/wiki/Polish/Noun_cases
    'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
           'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
}

# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
TIMEZONE_NAMES = {
    'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
    'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
    'EST': -5, 'EDT': -4,  # Eastern
    'CST': -6, 'CDT': -5,  # Central
    'MST': -7, 'MDT': -6,  # Mountain
    'PST': -8, 'PDT': -7,   # Pacific
}

# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))

DATE_FORMATS = (
    '%d %B %Y',
    '%d %b %Y',
    '%B %d %Y',
    '%B %dst %Y',
    '%B %dnd %Y',
    '%B %drd %Y',
    '%B %dth %Y',
    '%b %d %Y',
    '%b %dst %Y',
    '%b %dnd %Y',
    '%b %drd %Y',
    '%b %dth %Y',
    '%b %dst %Y %I:%M',
    '%b %dnd %Y %I:%M',
    '%b %drd %Y %I:%M',
    '%b %dth %Y %I:%M',
    '%Y %m %d',
    '%Y-%m-%d',
    '%Y.%m.%d.',
    '%Y/%m/%d',
    '%Y/%m/%d %H:%M',
    '%Y/%m/%d %H:%M:%S',
    '%Y%m%d%H%M',
    '%Y%m%d%H%M%S',
    '%Y%m%d',
    '%Y-%m-%d %H:%M',
    '%Y-%m-%d %H:%M:%S',
    '%Y-%m-%d %H:%M:%S.%f',
    '%Y-%m-%d %H:%M:%S:%f',
    '%d.%m.%Y %H:%M',
    '%d.%m.%Y %H.%M',
    '%Y-%m-%dT%H:%M:%SZ',
    '%Y-%m-%dT%H:%M:%S.%fZ',
    '%Y-%m-%dT%H:%M:%S.%f0Z',
    '%Y-%m-%dT%H:%M:%S',
    '%Y-%m-%dT%H:%M:%S.%f',
    '%Y-%m-%dT%H:%M',
    '%b %d %Y at %H:%M',
    '%b %d %Y at %H:%M:%S',
    '%B %d %Y at %H:%M',
    '%B %d %Y at %H:%M:%S',
    '%H:%M %d-%b-%Y',
)

DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
DATE_FORMATS_DAY_FIRST.extend([
    '%d-%m-%Y',
    '%d.%m.%Y',
    '%d.%m.%y',
    '%d/%m/%Y',
    '%d/%m/%y',
    '%d/%m/%Y %H:%M:%S',
    '%d-%m-%Y %H:%M',
    '%H:%M %d/%m/%Y',
])

DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
DATE_FORMATS_MONTH_FIRST.extend([
    '%m-%d-%Y',
    '%m.%d.%Y',
    '%m/%d/%Y',
    '%m/%d/%y',
    '%m/%d/%Y %H:%M:%S',
])

PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'

NUMBER_RE = r'\d+(?:\.\d+)?'


@functools.cache
def preferredencoding():
    """Get preferred encoding.

    Returns the best encoding scheme for the system, based on
    locale.getpreferredencoding() and some further tweaks.
    """
    try:
        pref = locale.getpreferredencoding()
        'TEST'.encode(pref)
    except Exception:
        pref = 'UTF-8'

    return pref


def write_json_file(obj, fn):
    """ Encode obj as JSON and write it to fn, atomically if possible """

    tf = tempfile.NamedTemporaryFile(
        prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
        suffix='.tmp', delete=False, mode='w', encoding='utf-8')

    try:
        with tf:
            json.dump(obj, tf, ensure_ascii=False)
        if sys.platform == 'win32':
            # Need to remove existing file on Windows, else os.rename raises
            # WindowsError or FileExistsError.
            with contextlib.suppress(OSError):
                os.unlink(fn)
        with contextlib.suppress(OSError):
            mask = os.umask(0)
            os.umask(mask)
            os.chmod(tf.name, 0o666 & ~mask)
        os.rename(tf.name, fn)
    except Exception:
        with contextlib.suppress(OSError):
            os.remove(tf.name)
        raise


def find_xpath_attr(node, xpath, key, val=None):
    """ Find the xpath xpath[@key=val] """
    assert re.match(r'^[a-zA-Z_-]+$', key)
    expr = xpath + (f'[@{key}]' if val is None else f"[@{key}='{val}']")
    return node.find(expr)

# On python2.6 the xml.etree.ElementTree.Element methods don't support
# the namespace parameter


def xpath_with_ns(path, ns_map):
    components = [c.split(':') for c in path.split('/')]
    replaced = []
    for c in components:
        if len(c) == 1:
            replaced.append(c[0])
        else:
            ns, tag = c
            replaced.append(f'{{{ns_map[ns]}}}{tag}')
    return '/'.join(replaced)


def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    def _find_xpath(xpath):
        return node.find(xpath)

    if isinstance(xpath, str):
        n = _find_xpath(xpath)
    else:
        for xp in xpath:
            n = _find_xpath(xp)
            if n is not None:
                break

    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError(f'Could not find XML element {name}')
        else:
            return None
    return n


def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
    if n is None or n == default:
        return n
    if n.text is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = xpath if name is None else name
            raise ExtractorError(f'Could not find XML element\'s text {name}')
        else:
            return None
    return n.text


def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
    n = find_xpath_attr(node, xpath, key)
    if n is None:
        if default is not NO_DEFAULT:
            return default
        elif fatal:
            name = f'{xpath}[@{key}]' if name is None else name
            raise ExtractorError(f'Could not find XML attribute {name}')
        else:
            return None
    return n.attrib[key]


def get_element_by_id(id, html, **kwargs):
    """Return the content of the tag with the specified ID in the passed HTML document"""
    return get_element_by_attribute('id', id, html, **kwargs)


def get_element_html_by_id(id, html, **kwargs):
    """Return the html of the tag with the specified ID in the passed HTML document"""
    return get_element_html_by_attribute('id', id, html, **kwargs)


def get_element_by_class(class_name, html):
    """Return the content of the first tag with the specified class in the passed HTML document"""
    retval = get_elements_by_class(class_name, html)
    return retval[0] if retval else None


def get_element_html_by_class(class_name, html):
    """Return the html of the first tag with the specified class in the passed HTML document"""
    retval = get_elements_html_by_class(class_name, html)
    return retval[0] if retval else None


def get_element_by_attribute(attribute, value, html, **kwargs):
    retval = get_elements_by_attribute(attribute, value, html, **kwargs)
    return retval[0] if retval else None


def get_element_html_by_attribute(attribute, value, html, **kargs):
    retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
    return retval[0] if retval else None


def get_elements_by_class(class_name, html, **kargs):
    """Return the content of all tags with the specified class in the passed HTML document as a list"""
    return get_elements_by_attribute(
        'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*',
        html, escape_value=False)


def get_elements_html_by_class(class_name, html):
    """Return the html of all tags with the specified class in the passed HTML document as a list"""
    return get_elements_html_by_attribute(
        'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*',
        html, escape_value=False)


def get_elements_by_attribute(*args, **kwargs):
    """Return the content of the tag with the specified attribute in the passed HTML document"""
    return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]


def get_elements_html_by_attribute(*args, **kwargs):
    """Return the html of the tag with the specified attribute in the passed HTML document"""
    return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]


def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
    """
    Return the text (content) and the html (whole) of the tag with the specified
    attribute in the passed HTML document
    """
    if not value:
        return

    quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'

    value = re.escape(value) if escape_value else value

    partial_element_re = rf'''(?x)
        <(?P<tag>{tag})
         (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
         \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
        '''

    for m in re.finditer(partial_element_re, html):
        content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])

        yield (
            unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
            whole,
        )


class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
    """
    HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
    closing tag for the first opening tag it has encountered, and can be used
    as a context manager
    """

    class HTMLBreakOnClosingTagException(Exception):
        pass

    def __init__(self):
        self.tagstack = collections.deque()
        html.parser.HTMLParser.__init__(self)

    def __enter__(self):
        return self

    def __exit__(self, *_):
        self.close()

    def close(self):
        # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
        # so data remains buffered; we no longer have any interest in it, thus
        # override this method to discard it
        pass

    def handle_starttag(self, tag, _):
        self.tagstack.append(tag)

    def handle_endtag(self, tag):
        if not self.tagstack:
            raise compat_HTMLParseError('no tags in the stack')
        while self.tagstack:
            inner_tag = self.tagstack.pop()
            if inner_tag == tag:
                break
        else:
            raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
        if not self.tagstack:
            raise self.HTMLBreakOnClosingTagException


# XXX: This should be far less strict
def get_element_text_and_html_by_tag(tag, html):
    """
    For the first element with the specified tag in the passed HTML document
    return its' content (text) and the whole element (html)
    """
    def find_or_raise(haystack, needle, exc):
        try:
            return haystack.index(needle)
        except ValueError:
            raise exc
    closing_tag = f'</{tag}>'
    whole_start = find_or_raise(
        html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
    content_start = find_or_raise(
        html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
    content_start += whole_start + 1
    with HTMLBreakOnClosingTagParser() as parser:
        parser.feed(html[whole_start:content_start])
        if not parser.tagstack or parser.tagstack[0] != tag:
            raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
        offset = content_start
        while offset < len(html):
            next_closing_tag_start = find_or_raise(
                html[offset:], closing_tag,
                compat_HTMLParseError(f'closing {tag} tag not found'))
            next_closing_tag_end = next_closing_tag_start + len(closing_tag)
            try:
                parser.feed(html[offset:offset + next_closing_tag_end])
                offset += next_closing_tag_end
            except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
                return html[content_start:offset + next_closing_tag_start], \
                    html[whole_start:offset + next_closing_tag_end]
        raise compat_HTMLParseError('unexpected end of html')


class HTMLAttributeParser(html.parser.HTMLParser):
    """Trivial HTML parser to gather the attributes for a single element"""

    def __init__(self):
        self.attrs = {}
        html.parser.HTMLParser.__init__(self)

    def handle_starttag(self, tag, attrs):
        self.attrs = dict(attrs)
        raise compat_HTMLParseError('done')


class HTMLListAttrsParser(html.parser.HTMLParser):
    """HTML parser to gather the attributes for the elements of a list"""

    def __init__(self):
        html.parser.HTMLParser.__init__(self)
        self.items = []
        self._level = 0

    def handle_starttag(self, tag, attrs):
        if tag == 'li' and self._level == 0:
            self.items.append(dict(attrs))
        self._level += 1

    def handle_endtag(self, tag):
        self._level -= 1


def extract_attributes(html_element):
    """Given a string for an HTML element such as
    <el
         a="foo" B="bar" c="&98;az" d=boz
         empty= noval entity="&amp;"
         sq='"' dq="'"
    >
    Decode and return a dictionary of attributes.
    {
        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
        'empty': '', 'noval': None, 'entity': '&',
        'sq': '"', 'dq': '\''
    }.
    """
    parser = HTMLAttributeParser()
    with contextlib.suppress(compat_HTMLParseError):
        parser.feed(html_element)
        parser.close()
    return parser.attrs


def parse_list(webpage):
    """Given a string for an series of HTML <li> elements,
    return a dictionary of their attributes"""
    parser = HTMLListAttrsParser()
    parser.feed(webpage)
    parser.close()
    return parser.items


def clean_html(html):
    """Clean an HTML snippet into a readable string"""

    if html is None:  # Convenience for sanitizing descriptions etc.
        return html

    html = re.sub(r'\s+', ' ', html)
    html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
    html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
    # Strip html tags
    html = re.sub('<.*?>', '', html)
    # Replace html entities
    html = unescapeHTML(html)
    return html.strip()


class LenientJSONDecoder(json.JSONDecoder):
    # TODO: Write tests
    def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
        self.transform_source, self.ignore_extra = transform_source, ignore_extra
        self._close_attempts = 2 * close_objects
        super().__init__(*args, **kwargs)

    @staticmethod
    def _close_object(err):
        doc = err.doc[:err.pos]
        # We need to add comma first to get the correct error message
        if err.msg.startswith('Expecting \',\''):
            return doc + ','
        elif not doc.endswith(','):
            return

        if err.msg.startswith('Expecting property name'):
            return doc[:-1] + '}'
        elif err.msg.startswith('Expecting value'):
            return doc[:-1] + ']'

    def decode(self, s):
        if self.transform_source:
            s = self.transform_source(s)
        for attempt in range(self._close_attempts + 1):
            try:
                if self.ignore_extra:
                    return self.raw_decode(s.lstrip())[0]
                return super().decode(s)
            except json.JSONDecodeError as e:
                if e.pos is None:
                    raise
                elif attempt < self._close_attempts:
                    s = self._close_object(e)
                    if s is not None:
                        continue
                raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
        assert False, 'Too many attempts to decode JSON'


def sanitize_open(filename, open_mode):
    """Try to open the given filename, and slightly tweak it if this fails.

    Attempts to open the given filename. If this fails, it tries to change
    the filename slightly, step by step, until it's either able to open it
    or it fails and raises a final exception, like the standard open()
    function.

    It returns the tuple (stream, definitive_file_name).
    """
    if filename == '-':
        if sys.platform == 'win32':
            import msvcrt

            # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
            with contextlib.suppress(io.UnsupportedOperation):
                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
        return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)

    for attempt in range(2):
        try:
            try:
                if sys.platform == 'win32':
                    # FIXME: An exclusive lock also locks the file from being read.
                    # Since windows locks are mandatory, don't lock the file on windows (for now).
                    # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
                    raise LockingUnsupportedError
                stream = locked_file(filename, open_mode, block=False).__enter__()
            except OSError:
                stream = open(filename, open_mode)
            return stream, filename
        except OSError as err:
            if attempt or err.errno in (errno.EACCES,):
                raise
            old_filename, filename = filename, sanitize_path(filename)
            if old_filename == filename:
                raise


def timeconvert(timestr):
    """Convert RFC 2822 defined time string into system timestamp"""
    timestamp = None
    timetuple = email.utils.parsedate_tz(timestr)
    if timetuple is not None:
        timestamp = email.utils.mktime_tz(timetuple)
    return timestamp


def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
    """Sanitizes a string so it could be used as part of a filename.
    @param restricted   Use a stricter subset of allowed characters
    @param is_id        Whether this is an ID that should be kept unchanged if possible.
                        If unset, yt-dlp's new sanitization rules are in effect
    """
    if s == '':
        return ''

    def replace_insane(char):
        if restricted and char in ACCENT_CHARS:
            return ACCENT_CHARS[char]
        elif not restricted and char == '\n':
            return '\0 '
        elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
            # Replace with their full-width unicode counterparts
            return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
        elif char == '?' or ord(char) < 32 or ord(char) == 127:
            return ''
        elif char == '"':
            return '' if restricted else '\''
        elif char == ':':
            return '\0_\0-' if restricted else '\0 \0-'
        elif char in '\\/|*<>':
            return '\0_'
        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
            return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
        return char

    # Replace look-alike Unicode glyphs
    if restricted and (is_id is NO_DEFAULT or not is_id):
        s = unicodedata.normalize('NFKC', s)
    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
    result = ''.join(map(replace_insane, s))
    if is_id is NO_DEFAULT:
        result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
        STRIP_RE = r'(?:\0.|[ _-])*'
        result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
    result = result.replace('\0', '') or '_'

    if not is_id:
        while '__' in result:
            result = result.replace('__', '_')
        result = result.strip('_')
        # Common case of "Foreign band name - English song title"
        if restricted and result.startswith('-_'):
            result = result[2:]
        if result.startswith('-'):
            result = '_' + result[len('-'):]
        result = result.lstrip('.')
        if not result:
            result = '_'
    return result


def sanitize_path(s, force=False):
    """Sanitizes and normalizes path on Windows"""
    # XXX: this handles drive relative paths (c:sth) incorrectly
    if sys.platform == 'win32':
        force = False
        drive_or_unc, _ = os.path.splitdrive(s)
    elif force:
        drive_or_unc = ''
    else:
        return s

    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
    if drive_or_unc:
        norm_path.pop(0)
    sanitized_path = [
        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
        for path_part in norm_path]
    if drive_or_unc:
        sanitized_path.insert(0, drive_or_unc + os.path.sep)
    elif force and s and s[0] == os.path.sep:
        sanitized_path.insert(0, os.path.sep)
    # TODO: Fix behavioral differences <3.12
    # The workaround using `normpath` only superficially passes tests
    # Ref: https://github.com/python/cpython/pull/100351
    return os.path.normpath(os.path.join(*sanitized_path))


def sanitize_url(url, *, scheme='http'):
    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
    # the number of unwanted failures due to missing protocol
    if url is None:
        return
    elif url.startswith('//'):
        return f'{scheme}:{url}'
    # Fix some common typos seen so far
    COMMON_TYPOS = (
        # https://github.com/ytdl-org/youtube-dl/issues/15649
        (r'^httpss://', r'https://'),
        # https://bx1.be/lives/direct-tv/
        (r'^rmtp([es]?)://', r'rtmp\1://'),
    )
    for mistake, fixup in COMMON_TYPOS:
        if re.match(mistake, url):
            return re.sub(mistake, fixup, url)
    return url


def extract_basic_auth(url):
    parts = urllib.parse.urlsplit(url)
    if parts.username is None:
        return url, None
    url = urllib.parse.urlunsplit(parts._replace(netloc=(
        parts.hostname if parts.port is None
        else f'{parts.hostname}:{parts.port}')))
    auth_payload = base64.b64encode(
        ('{}:{}'.format(parts.username, parts.password or '')).encode())
    return url, f'Basic {auth_payload.decode()}'


def expand_path(s):
    """Expand shell variables and ~"""
    return os.path.expandvars(compat_expanduser(s))


def orderedSet(iterable, *, lazy=False):
    """Remove all duplicates from the input iterable"""
    def _iter():
        seen = []  # Do not use set since the items can be unhashable
        for x in iterable:
            if x not in seen:
                seen.append(x)
                yield x

    return _iter() if lazy else list(_iter())


def _htmlentity_transform(entity_with_semicolon):
    """Transforms an HTML entity to a character."""
    entity = entity_with_semicolon[:-1]

    # Known non-numeric HTML entity
    if entity in html.entities.name2codepoint:
        return chr(html.entities.name2codepoint[entity])

    # TODO: HTML5 allows entities without a semicolon.
    # E.g. '&Eacuteric' should be decoded as 'Éric'.
    if entity_with_semicolon in html.entities.html5:
        return html.entities.html5[entity_with_semicolon]

    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
    if mobj is not None:
        numstr = mobj.group(1)
        if numstr.startswith('x'):
            base = 16
            numstr = f'0{numstr}'
        else:
            base = 10
        # See https://github.com/ytdl-org/youtube-dl/issues/7518
        with contextlib.suppress(ValueError):
            return chr(int(numstr, base))

    # Unknown entity in name, return its literal representation
    return f'&{entity};'


def unescapeHTML(s):
    if s is None:
        return None
    assert isinstance(s, str)

    return re.sub(
        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)


def escapeHTML(text):
    return (
        text
        .replace('&', '&amp;')
        .replace('<', '&lt;')
        .replace('>', '&gt;')
        .replace('"', '&quot;')
        .replace("'", '&#39;')
    )


class netrc_from_content(netrc.netrc):
    def __init__(self, content):
        self.hosts, self.macros = {}, {}
        with io.StringIO(content) as stream:
            self._parse('-', stream, False)


class Popen(subprocess.Popen):
    if sys.platform == 'win32':
        _startupinfo = subprocess.STARTUPINFO()
        _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
    else:
        _startupinfo = None

    @staticmethod
    def _fix_pyinstaller_ld_path(env):
        """Restore LD_LIBRARY_PATH when using PyInstaller
            Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
                 https://github.com/yt-dlp/yt-dlp/issues/4573
        """
        if not hasattr(sys, '_MEIPASS'):
            return

        def _fix(key):
            orig = env.get(f'{key}_ORIG')
            if orig is None:
                env.pop(key, None)
            else:
                env[key] = orig

        _fix('LD_LIBRARY_PATH')  # Linux
        _fix('DYLD_LIBRARY_PATH')  # macOS

    def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
        if env is None:
            env = os.environ.copy()
        self._fix_pyinstaller_ld_path(env)

        self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
        if text is True:
            kwargs['universal_newlines'] = True  # For 3.6 compatibility
            kwargs.setdefault('encoding', 'utf-8')
            kwargs.setdefault('errors', 'replace')

        if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
            if not isinstance(args, str):
                args = shell_quote(args, shell=True)
            shell = False
            # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
            env['='] = '"^\n\n"'
            args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'

        super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)

    def __comspec(self):
        comspec = os.environ.get('ComSpec') or os.path.join(
            os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
        if os.path.isabs(comspec):
            return comspec
        raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')

    def communicate_or_kill(self, *args, **kwargs):
        try:
            return self.communicate(*args, **kwargs)
        except BaseException:  # Including KeyboardInterrupt
            self.kill(timeout=None)
            raise

    def kill(self, *, timeout=0):
        super().kill()
        if timeout != 0:
            self.wait(timeout=timeout)

    @classmethod
    def run(cls, *args, timeout=None, **kwargs):
        with cls(*args, **kwargs) as proc:
            default = '' if proc.__text_mode else b''
            stdout, stderr = proc.communicate_or_kill(timeout=timeout)
            return stdout or default, stderr or default, proc.returncode


def encodeArgument(s):
    # Legacy code that uses byte strings
    # Uncomment the following line after fixing all post processors
    # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
    return s if isinstance(s, str) else s.decode('ascii')


_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))


def timetuple_from_msec(msec):
    secs, msec = divmod(msec, 1000)
    mins, secs = divmod(secs, 60)
    hrs, mins = divmod(mins, 60)
    return _timetuple(hrs, mins, secs, msec)


def formatSeconds(secs, delim=':', msec=False):
    time = timetuple_from_msec(secs * 1000)
    if time.hours:
        ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
    elif time.minutes:
        ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
    else:
        ret = '%d' % time.seconds
    return '%s.%03d' % (ret, time.milliseconds) if msec else ret


def bug_reports_message(before=';'):
    from ..update import REPOSITORY

    msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
           'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')

    before = before.rstrip()
    if not before or before.endswith(('.', '!', '?')):
        msg = msg[0].title() + msg[1:]

    return (before + ' ' if before else '') + msg


class YoutubeDLError(Exception):
    """Base exception for YoutubeDL errors."""
    msg = None

    def __init__(self, msg=None):
        if msg is not None:
            self.msg = msg
        elif self.msg is None:
            self.msg = type(self).__name__
        super().__init__(self.msg)


class ExtractorError(YoutubeDLError):
    """Error during info extraction."""

    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
        """ tb, if given, is the original traceback (so that it can be printed out).
        If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
        """
        from ..networking.exceptions import network_exceptions
        if sys.exc_info()[0] in network_exceptions:
            expected = True

        self.orig_msg = str(msg)
        self.traceback = tb
        self.expected = expected
        self.cause = cause
        self.video_id = video_id
        self.ie = ie
        self.exc_info = sys.exc_info()  # preserve original exception
        if isinstance(self.exc_info[1], ExtractorError):
            self.exc_info = self.exc_info[1].exc_info
        super().__init__(self.__msg)

    @property
    def __msg(self):
        return ''.join((
            format_field(self.ie, None, '[%s] '),
            format_field(self.video_id, None, '%s: '),
            self.orig_msg,
            format_field(self.cause, None, ' (caused by %r)'),
            '' if self.expected else bug_reports_message()))

    def format_traceback(self):
        return join_nonempty(
            self.traceback and ''.join(traceback.format_tb(self.traceback)),
            self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
            delim='\n') or None

    def __setattr__(self, name, value):
        super().__setattr__(name, value)
        if getattr(self, 'msg', None) and name not in ('msg', 'args'):
            self.msg = self.__msg or type(self).__name__
            self.args = (self.msg, )  # Cannot be property


class UnsupportedError(ExtractorError):
    def __init__(self, url):
        super().__init__(
            f'Unsupported URL: {url}', expected=True)
        self.url = url


class RegexNotFoundError(ExtractorError):
    """Error when a regex didn't match"""
    pass


class GeoRestrictedError(ExtractorError):
    """Geographic restriction Error exception.

    This exception may be thrown when a video is not available from your
    geographic location due to geographic restrictions imposed by a website.
    """

    def __init__(self, msg, countries=None, **kwargs):
        kwargs['expected'] = True
        super().__init__(msg, **kwargs)
        self.countries = countries


class UserNotLive(ExtractorError):
    """Error when a channel/user is not live"""

    def __init__(self, msg=None, **kwargs):
        kwargs['expected'] = True
        super().__init__(msg or 'The channel is not currently live', **kwargs)


class DownloadError(YoutubeDLError):
    """Download Error exception.

    This exception may be thrown by FileDownloader objects if they are not
    configured to continue on errors. They will contain the appropriate
    error message.
    """

    def __init__(self, msg, exc_info=None):
        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
        super().__init__(msg)
        self.exc_info = exc_info


class EntryNotInPlaylist(YoutubeDLError):
    """Entry not in playlist exception.

    This exception will be thrown by YoutubeDL when a requested entry
    is not found in the playlist info_dict
    """
    msg = 'Entry not found in info'


class SameFileError(YoutubeDLError):
    """Same File exception.

    This exception will be thrown by FileDownloader objects if they detect
    multiple files would have to be downloaded to the same file on disk.
    """
    msg = 'Fixed output name but more than one file to download'

    def __init__(self, filename=None):
        if filename is not None:
            self.msg += f': {filename}'
        super().__init__(self.msg)


class PostProcessingError(YoutubeDLError):
    """Post Processing exception.

    This exception may be raised by PostProcessor's .run() method to
    indicate an error in the postprocessing task.
    """


class DownloadCancelled(YoutubeDLError):
    """ Exception raised when the download queue should be interrupted """
    msg = 'The download was cancelled'


class ExistingVideoReached(DownloadCancelled):
    """ --break-on-existing triggered """
    msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'


class RejectedVideoReached(DownloadCancelled):
    """ --break-match-filter triggered """
    msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'


class MaxDownloadsReached(DownloadCancelled):
    """ --max-downloads limit has been reached. """
    msg = 'Maximum number of downloads reached, stopping due to --max-downloads'


class ReExtractInfo(YoutubeDLError):
    """ Video info needs to be re-extracted. """

    def __init__(self, msg, expected=False):
        super().__init__(msg)
        self.expected = expected


class ThrottledDownload(ReExtractInfo):
    """ Download speed below --throttled-rate. """
    msg = 'The download speed is below throttle limit'

    def __init__(self):
        super().__init__(self.msg, expected=False)


class UnavailableVideoError(YoutubeDLError):
    """Unavailable Format exception.

    This exception will be thrown when a video is requested
    in a format that is not available for that video.
    """
    msg = 'Unable to download video'

    def __init__(self, err=None):
        if err is not None:
            self.msg += f': {err}'
        super().__init__(self.msg)


class ContentTooShortError(YoutubeDLError):
    """Content Too Short exception.

    This exception may be raised by FileDownloader objects when a file they
    download is too small for what the server announced first, indicating
    the connection was probably interrupted.
    """

    def __init__(self, downloaded, expected):
        super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
        # Both in bytes
        self.downloaded = downloaded
        self.expected = expected


class XAttrMetadataError(YoutubeDLError):
    def __init__(self, code=None, msg='Unknown error'):
        super().__init__(msg)
        self.code = code
        self.msg = msg

        # Parsing code and msg
        if (self.code in (errno.ENOSPC, errno.EDQUOT)
                or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
            self.reason = 'NO_SPACE'
        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
            self.reason = 'VALUE_TOO_LONG'
        else:
            self.reason = 'NOT_SUPPORTED'


class XAttrUnavailableError(YoutubeDLError):
    pass


def is_path_like(f):
    return isinstance(f, (str, bytes, os.PathLike))


def extract_timezone(date_str, default=None):
    m = re.search(
        r'''(?x)
            ^.{8,}?                                              # >=8 char non-TZ prefix, if present
            (?P<tz>Z|                                            # just the UTC Z, or
                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
                   [ ]?                                          # optional space
                (?P<sign>\+|-)                                   # +/-
                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
            $)
        ''', date_str)
    timezone = None

    if not m:
        m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
        timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
        if timezone is not None:
            date_str = date_str[:-len(m.group('tz'))]
            timezone = dt.timedelta(hours=timezone)
    else:
        date_str = date_str[:-len(m.group('tz'))]
        if m.group('sign'):
            sign = 1 if m.group('sign') == '+' else -1
            timezone = dt.timedelta(
                hours=sign * int(m.group('hours')),
                minutes=sign * int(m.group('minutes')))

    if timezone is None and default is not NO_DEFAULT:
        timezone = default or dt.timedelta()

    return timezone, date_str


def parse_iso8601(date_str, delimiter='T', timezone=None):
    """ Return a UNIX timestamp from the given date """

    if date_str is None:
        return None

    date_str = re.sub(r'\.[0-9]+', '', date_str)

    timezone, date_str = extract_timezone(date_str, timezone)

    with contextlib.suppress(ValueError, TypeError):
        date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
        dt_ = dt.datetime.strptime(date_str, date_format) - timezone
        return calendar.timegm(dt_.timetuple())


def date_formats(day_first=True):
    return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST


def unified_strdate(date_str, day_first=True):
    """Return a string with the date in the format YYYYMMDD"""

    if date_str is None:
        return None
    upload_date = None
    # Replace commas
    date_str = date_str.replace(',', ' ')
    # Remove AM/PM + timezone
    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
    _, date_str = extract_timezone(date_str)

    for expression in date_formats(day_first):
        with contextlib.suppress(ValueError):
            upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
    if upload_date is None:
        timetuple = email.utils.parsedate_tz(date_str)
        if timetuple:
            with contextlib.suppress(ValueError):
                upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
    if upload_date is not None:
        return str(upload_date)


def unified_timestamp(date_str, day_first=True):
    if not isinstance(date_str, str):
        return None

    date_str = re.sub(r'\s+', ' ', re.sub(
        r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))

    pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
    timezone, date_str = extract_timezone(date_str)

    # Remove AM/PM + timezone
    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)

    # Remove unrecognized timezones from ISO 8601 alike timestamps
    m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
    if m:
        date_str = date_str[:-len(m.group('tz'))]

    # Python only supports microseconds, so remove nanoseconds
    m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
    if m:
        date_str = m.group(1)

    for expression in date_formats(day_first):
        with contextlib.suppress(ValueError):
            dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
            return calendar.timegm(dt_.timetuple())

    timetuple = email.utils.parsedate_tz(date_str)
    if timetuple:
        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()


def determine_ext(url, default_ext='unknown_video'):
    if url is None or '.' not in url:
        return default_ext
    guess = url.partition('?')[0].rpartition('.')[2]
    if re.match(r'^[A-Za-z0-9]+$', guess):
        return guess
    # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
    elif guess.rstrip('/') in KNOWN_EXTENSIONS:
        return guess.rstrip('/')
    else:
        return default_ext


def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
    return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)


def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
    R"""
    Return a datetime object from a string.
    Supported format:
        (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?

    @param format       strftime format of DATE
    @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
                        auto: round to the unit provided in date_str (if applicable).
    """
    auto_precision = False
    if precision == 'auto':
        auto_precision = True
        precision = 'microsecond'
    today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
    if date_str in ('now', 'today'):
        return today
    if date_str == 'yesterday':
        return today - dt.timedelta(days=1)
    match = re.match(
        r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
        date_str)
    if match is not None:
        start_time = datetime_from_str(match.group('start'), precision, format)
        time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
        unit = match.group('unit')
        if unit == 'month' or unit == 'year':
            new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
            unit = 'day'
        else:
            if unit == 'week':
                unit = 'day'
                time *= 7
            delta = dt.timedelta(**{unit + 's': time})
            new_date = start_time + delta
        if auto_precision:
            return datetime_round(new_date, unit)
        return new_date

    return datetime_round(dt.datetime.strptime(date_str, format), precision)


def date_from_str(date_str, format='%Y%m%d', strict=False):
    R"""
    Return a date object from a string using datetime_from_str

    @param strict  Restrict allowed patterns to "YYYYMMDD" and
                   (now|today|yesterday)(-\d+(day|week|month|year)s?)?
    """
    if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
        raise ValueError(f'Invalid date format "{date_str}"')
    return datetime_from_str(date_str, precision='microsecond', format=format).date()


def datetime_add_months(dt_, months):
    """Increment/Decrement a datetime object by months."""
    month = dt_.month + months - 1
    year = dt_.year + month // 12
    month = month % 12 + 1
    day = min(dt_.day, calendar.monthrange(year, month)[1])
    return dt_.replace(year, month, day)


def datetime_round(dt_, precision='day'):
    """
    Round a datetime object's time to a specific precision
    """
    if precision == 'microsecond':
        return dt_

    unit_seconds = {
        'day': 86400,
        'hour': 3600,
        'minute': 60,
        'second': 1,
    }
    roundto = lambda x, n: ((x + n / 2) // n) * n
    timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
    return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)


def hyphenate_date(date_str):
    """
    Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
    match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
    if match is not None:
        return '-'.join(match.groups())
    else:
        return date_str


class DateRange:
    """Represents a time interval between two dates"""

    def __init__(self, start=None, end=None):
        """start and end must be strings in the format accepted by date"""
        if start is not None:
            self.start = date_from_str(start, strict=True)
        else:
            self.start = dt.datetime.min.date()
        if end is not None:
            self.end = date_from_str(end, strict=True)
        else:
            self.end = dt.datetime.max.date()
        if self.start > self.end:
            raise ValueError(f'Date range: "{self}" , the start date must be before the end date')

    @classmethod
    def day(cls, day):
        """Returns a range that only contains the given day"""
        return cls(day, day)

    def __contains__(self, date):
        """Check if the date is in the range"""
        if not isinstance(date, dt.date):
            date = date_from_str(date)
        return self.start <= date <= self.end

    def __repr__(self):
        return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'

    def __str__(self):
        return f'{self.start} to {self.end}'

    def __eq__(self, other):
        return (isinstance(other, DateRange)
                and self.start == other.start and self.end == other.end)


@functools.cache
def system_identifier():
    python_implementation = platform.python_implementation()
    if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
        python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
    libc_ver = []
    with contextlib.suppress(OSError):  # We may not have access to the executable
        libc_ver = platform.libc_ver()

    return 'Python {} ({} {} {}) - {} ({}{})'.format(
        platform.python_version(),
        python_implementation,
        platform.machine(),
        platform.architecture()[0],
        platform.platform(),
        ssl.OPENSSL_VERSION,
        format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
    )


@functools.cache
def get_windows_version():
    """ Get Windows version. returns () if it's not running on Windows """
    if compat_os_name == 'nt':
        return version_tuple(platform.win32_ver()[1])
    else:
        return ()


def write_string(s, out=None, encoding=None):
    assert isinstance(s, str)
    out = out or sys.stderr
    # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
    if not out:
        return

    if compat_os_name == 'nt' and supports_terminal_sequences(out):
        s = re.sub(r'([\r\n]+)', r' \1', s)

    enc, buffer = None, out
    # `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
    if 'b' in (getattr(out, 'mode', None) or ''):
        enc = encoding or preferredencoding()
    elif hasattr(out, 'buffer'):
        buffer = out.buffer
        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()

    buffer.write(s.encode(enc, 'ignore') if enc else s)
    out.flush()


# TODO: Use global logger
def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
    from .. import _IN_CLI
    if _IN_CLI:
        if msg in deprecation_warning._cache:
            return
        deprecation_warning._cache.add(msg)
        if printer:
            return printer(f'{msg}{bug_reports_message()}', **kwargs)
        return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
    else:
        import warnings
        warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)


deprecation_warning._cache = set()


def bytes_to_intlist(bs):
    if not bs:
        return []
    if isinstance(bs[0], int):  # Python 3
        return list(bs)
    else:
        return [ord(c) for c in bs]


def intlist_to_bytes(xs):
    if not xs:
        return b''
    return struct.pack('%dB' % len(xs), *xs)


class LockingUnsupportedError(OSError):
    msg = 'File locking is not supported'

    def __init__(self):
        super().__init__(self.msg)


# Cross-platform file locking
if sys.platform == 'win32':
    import ctypes
    import ctypes.wintypes
    import msvcrt

    class OVERLAPPED(ctypes.Structure):
        _fields_ = [
            ('Internal', ctypes.wintypes.LPVOID),
            ('InternalHigh', ctypes.wintypes.LPVOID),
            ('Offset', ctypes.wintypes.DWORD),
            ('OffsetHigh', ctypes.wintypes.DWORD),
            ('hEvent', ctypes.wintypes.HANDLE),
        ]

    kernel32 = ctypes.WinDLL('kernel32')
    LockFileEx = kernel32.LockFileEx
    LockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwFlags
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED),  # Overlapped
    ]
    LockFileEx.restype = ctypes.wintypes.BOOL
    UnlockFileEx = kernel32.UnlockFileEx
    UnlockFileEx.argtypes = [
        ctypes.wintypes.HANDLE,     # hFile
        ctypes.wintypes.DWORD,      # dwReserved
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
        ctypes.POINTER(OVERLAPPED),  # Overlapped
    ]
    UnlockFileEx.restype = ctypes.wintypes.BOOL
    whole_low = 0xffffffff
    whole_high = 0x7fffffff

    def _lock_file(f, exclusive, block):
        overlapped = OVERLAPPED()
        overlapped.Offset = 0
        overlapped.OffsetHigh = 0
        overlapped.hEvent = 0
        f._lock_file_overlapped_p = ctypes.pointer(overlapped)

        if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
                          (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
                          0, whole_low, whole_high, f._lock_file_overlapped_p):
            # NB: No argument form of "ctypes.FormatError" does not work on PyPy
            raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')

    def _unlock_file(f):
        assert f._lock_file_overlapped_p
        handle = msvcrt.get_osfhandle(f.fileno())
        if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
            raise OSError(f'Unlocking file failed: {ctypes.FormatError()!r}')

else:
    try:
        import fcntl

        def _lock_file(f, exclusive, block):
            flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
            if not block:
                flags |= fcntl.LOCK_NB
            try:
                fcntl.flock(f, flags)
            except BlockingIOError:
                raise
            except OSError:  # AOSP does not have flock()
                fcntl.lockf(f, flags)

        def _unlock_file(f):
            with contextlib.suppress(OSError):
                return fcntl.flock(f, fcntl.LOCK_UN)
            with contextlib.suppress(OSError):
                return fcntl.lockf(f, fcntl.LOCK_UN)  # AOSP does not have flock()
            return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB)  # virtiofs needs LOCK_NB on unlocking

    except ImportError:

        def _lock_file(f, exclusive, block):
            raise LockingUnsupportedError

        def _unlock_file(f):
            raise LockingUnsupportedError


class locked_file:
    locked = False

    def __init__(self, filename, mode, block=True, encoding=None):
        if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
            raise NotImplementedError(mode)
        self.mode, self.block = mode, block

        writable = any(f in mode for f in 'wax+')
        readable = any(f in mode for f in 'r+')
        flags = functools.reduce(operator.ior, (
            getattr(os, 'O_CLOEXEC', 0),  # UNIX only
            getattr(os, 'O_BINARY', 0),  # Windows only
            getattr(os, 'O_NOINHERIT', 0),  # Windows only
            os.O_CREAT if writable else 0,  # O_TRUNC only after locking
            os.O_APPEND if 'a' in mode else 0,
            os.O_EXCL if 'x' in mode else 0,
            os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
        ))

        self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)

    def __enter__(self):
        exclusive = 'r' not in self.mode
        try:
            _lock_file(self.f, exclusive, self.block)
            self.locked = True
        except OSError:
            self.f.close()
            raise
        if 'w' in self.mode:
            try:
                self.f.truncate()
            except OSError as e:
                if e.errno not in (
                    errno.ESPIPE,  # Illegal seek - expected for FIFO
                    errno.EINVAL,  # Invalid argument - expected for /dev/null
                ):
                    raise
        return self

    def unlock(self):
        if not self.locked:
            return
        try:
            _unlock_file(self.f)
        finally:
            self.locked = False

    def __exit__(self, *_):
        try:
            self.unlock()
        finally:
            self.f.close()

    open = __enter__
    close = __exit__

    def __getattr__(self, attr):
        return getattr(self.f, attr)

    def __iter__(self):
        return iter(self.f)


@functools.cache
def get_filesystem_encoding():
    encoding = sys.getfilesystemencoding()
    return encoding if encoding is not None else 'utf-8'


_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
_CMD_QUOTE_TRANS = str.maketrans({
    # Keep quotes balanced by replacing them with `""` instead of `\\"`
    '"': '""',
    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
    # `=` should be unique since variables containing `=` cannot be set using cmd
    '\n': '%=%',
    '\r': '%=%',
    # Use zero length variable replacement so `%` doesn't get expanded
    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
    '%': '%%cd:~,%',
})


def shell_quote(args, *, shell=False):
    args = list(variadic(args))

    if compat_os_name != 'nt':
        return shlex.join(args)

    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
    return ' '.join(
        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
        for s in args)


def smuggle_url(url, data):
    """ Pass additional data in a URL for internal use. """

    url, idata = unsmuggle_url(url, {})
    data.update(idata)
    sdata = urllib.parse.urlencode(
        {'__youtubedl_smuggle': json.dumps(data)})
    return url + '#' + sdata


def unsmuggle_url(smug_url, default=None):
    if '#__youtubedl_smuggle' not in smug_url:
        return smug_url, default
    url, _, sdata = smug_url.rpartition('#')
    jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
    data = json.loads(jsond)
    return url, data


def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
    """ Formats numbers with decimal sufixes like K, M, etc """
    num, factor = float_or_none(num), float(factor)
    if num is None or num < 0:
        return None
    POSSIBLE_SUFFIXES = 'kMGTPEZY'
    exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
    suffix = ['', *POSSIBLE_SUFFIXES][exponent]
    if factor == 1024:
        suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
    converted = num / (factor ** exponent)
    return fmt % (converted, suffix)


def format_bytes(bytes):
    return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'


def lookup_unit_table(unit_table, s, strict=False):
    num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
    units_re = '|'.join(re.escape(u) for u in unit_table)
    m = (re.fullmatch if strict else re.match)(
        rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
    if not m:
        return None

    num = float(m.group('num').replace(',', '.'))
    mult = unit_table[m.group('unit')]
    return round(num * mult)


def parse_bytes(s):
    """Parse a string indicating a byte quantity into an integer"""
    return lookup_unit_table(
        {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
        s.upper(), strict=True)


def parse_filesize(s):
    if s is None:
        return None

    # The lower-case forms are of course incorrect and unofficial,
    # but we support those too
    _UNIT_TABLE = {
        'B': 1,
        'b': 1,
        'bytes': 1,
        'KiB': 1024,
        'KB': 1000,
        'kB': 1024,
        'Kb': 1000,
        'kb': 1000,
        'kilobytes': 1000,
        'kibibytes': 1024,
        'MiB': 1024 ** 2,
        'MB': 1000 ** 2,
        'mB': 1024 ** 2,
        'Mb': 1000 ** 2,
        'mb': 1000 ** 2,
        'megabytes': 1000 ** 2,
        'mebibytes': 1024 ** 2,
        'GiB': 1024 ** 3,
        'GB': 1000 ** 3,
        'gB': 1024 ** 3,
        'Gb': 1000 ** 3,
        'gb': 1000 ** 3,
        'gigabytes': 1000 ** 3,
        'gibibytes': 1024 ** 3,
        'TiB': 1024 ** 4,
        'TB': 1000 ** 4,
        'tB': 1024 ** 4,
        'Tb': 1000 ** 4,
        'tb': 1000 ** 4,
        'terabytes': 1000 ** 4,
        'tebibytes': 1024 ** 4,
        'PiB': 1024 ** 5,
        'PB': 1000 ** 5,
        'pB': 1024 ** 5,
        'Pb': 1000 ** 5,
        'pb': 1000 ** 5,
        'petabytes': 1000 ** 5,
        'pebibytes': 1024 ** 5,
        'EiB': 1024 ** 6,
        'EB': 1000 ** 6,
        'eB': 1024 ** 6,
        'Eb': 1000 ** 6,
        'eb': 1000 ** 6,
        'exabytes': 1000 ** 6,
        'exbibytes': 1024 ** 6,
        'ZiB': 1024 ** 7,
        'ZB': 1000 ** 7,
        'zB': 1024 ** 7,
        'Zb': 1000 ** 7,
        'zb': 1000 ** 7,
        'zettabytes': 1000 ** 7,
        'zebibytes': 1024 ** 7,
        'YiB': 1024 ** 8,
        'YB': 1000 ** 8,
        'yB': 1024 ** 8,
        'Yb': 1000 ** 8,
        'yb': 1000 ** 8,
        'yottabytes': 1000 ** 8,
        'yobibytes': 1024 ** 8,
    }

    return lookup_unit_table(_UNIT_TABLE, s)


def parse_count(s):
    if s is None:
        return None

    s = re.sub(r'^[^\d]+\s', '', s).strip()

    if re.match(r'^[\d,.]+$', s):
        return str_to_int(s)

    _UNIT_TABLE = {
        'k': 1000,
        'K': 1000,
        'm': 1000 ** 2,
        'M': 1000 ** 2,
        'kk': 1000 ** 2,
        'KK': 1000 ** 2,
        'b': 1000 ** 3,
        'B': 1000 ** 3,
    }

    ret = lookup_unit_table(_UNIT_TABLE, s)
    if ret is not None:
        return ret

    mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
    if mobj:
        return str_to_int(mobj.group(1))


def parse_resolution(s, *, lenient=False):
    if s is None:
        return {}

    if lenient:
        mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
    else:
        mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
    if mobj:
        return {
            'width': int(mobj.group('w')),
            'height': int(mobj.group('h')),
        }

    mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
    if mobj:
        return {'height': int(mobj.group(1))}

    mobj = re.search(r'\b([48])[kK]\b', s)
    if mobj:
        return {'height': int(mobj.group(1)) * 540}

    return {}


def parse_bitrate(s):
    if not isinstance(s, str):
        return
    mobj = re.search(r'\b(\d+)\s*kbps', s)
    if mobj:
        return int(mobj.group(1))


def month_by_name(name, lang='en'):
    """ Return the number of a month by (locale-independently) English name """

    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])

    try:
        return month_names.index(name) + 1
    except ValueError:
        return None


def month_by_abbreviation(abbrev):
    """ Return the number of a month by (locale-independently) English
        abbreviations """

    try:
        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
    except ValueError:
        return None


def fix_xml_ampersands(xml_str):
    """Replace all the '&' by '&amp;' in XML"""
    return re.sub(
        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
        '&amp;',
        xml_str)


def setproctitle(title):
    assert isinstance(title, str)

    # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
    try:
        import ctypes
    except ImportError:
        return

    try:
        libc = ctypes.cdll.LoadLibrary('libc.so.6')
    except OSError:
        return
    except TypeError:
        # LoadLibrary in Windows Python 2.7.13 only expects
        # a bytestring, but since unicode_literals turns
        # every string into a unicode string, it fails.
        return
    title_bytes = title.encode()
    buf = ctypes.create_string_buffer(len(title_bytes))
    buf.value = title_bytes
    try:
        # PR_SET_NAME = 15      Ref: /usr/include/linux/prctl.h
        libc.prctl(15, buf, 0, 0, 0)
    except AttributeError:
        return  # Strange libc, just skip this


def remove_start(s, start):
    return s[len(start):] if s is not None and s.startswith(start) else s


def remove_end(s, end):
    return s[:-len(end)] if s is not None and s.endswith(end) else s


def remove_quotes(s):
    if s is None or len(s) < 2:
        return s
    for quote in ('"', "'"):
        if s[0] == quote and s[-1] == quote:
            return s[1:-1]
    return s


def get_domain(url):
    """
    This implementation is inconsistent, but is kept for compatibility.
    Use this only for "webpage_url_domain"
    """
    return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None


def url_basename(url):
    path = urllib.parse.urlparse(url).path
    return path.strip('/').split('/')[-1]


def base_url(url):
    return re.match(r'https?://[^?#]+/', url).group()


def urljoin(base, path):
    if isinstance(path, bytes):
        path = path.decode()
    if not isinstance(path, str) or not path:
        return None
    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
        return path
    if isinstance(base, bytes):
        base = base.decode()
    if not isinstance(base, str) or not re.match(
            r'^(?:https?:)?//', base):
        return None
    return urllib.parse.urljoin(base, path)


def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
    if get_attr and v is not None:
        v = getattr(v, get_attr, None)
    try:
        return int(v) * invscale // scale
    except (ValueError, TypeError, OverflowError):
        return default


def str_or_none(v, default=None):
    return default if v is None else str(v)


def str_to_int(int_str):
    """ A more relaxed version of int_or_none """
    if isinstance(int_str, int):
        return int_str
    elif isinstance(int_str, str):
        int_str = re.sub(r'[,\.\+]', '', int_str)
        return int_or_none(int_str)


def float_or_none(v, scale=1, invscale=1, default=None):
    if v is None:
        return default
    try:
        return float(v) * invscale / scale
    except (ValueError, TypeError):
        return default


def bool_or_none(v, default=None):
    return v if isinstance(v, bool) else default


def strip_or_none(v, default=None):
    return v.strip() if isinstance(v, str) else default


def url_or_none(url):
    if not url or not isinstance(url, str):
        return None
    url = url.strip()
    return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None


def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
    datetime_object = None
    try:
        if isinstance(timestamp, (int, float)):  # unix timestamp
            # Using naive datetime here can break timestamp() in Windows
            # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
            # Also, dt.datetime.fromtimestamp breaks for negative timestamps
            # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
            datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
                               + dt.timedelta(seconds=timestamp))
        elif isinstance(timestamp, str):  # assume YYYYMMDD
            datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
        date_format = re.sub(  # Support %s on windows
            r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
        return datetime_object.strftime(date_format)
    except (ValueError, TypeError, AttributeError):
        return default


def parse_duration(s):
    if not isinstance(s, str):
        return None
    s = s.strip()
    if not s:
        return None

    days, hours, mins, secs, ms = [None] * 5
    m = re.match(r'''(?x)
            (?P<before_secs>
                (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
            (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
            (?P<ms>[.:][0-9]+)?Z?$
        ''', s)
    if m:
        days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
    else:
        m = re.match(
            r'''(?ix)(?:P?
                (?:
                    [0-9]+\s*y(?:ears?)?,?\s*
                )?
                (?:
                    [0-9]+\s*m(?:onths?)?,?\s*
                )?
                (?:
                    [0-9]+\s*w(?:eeks?)?,?\s*
                )?
                (?:
                    (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
                )?
                T)?
                (?:
                    (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
                )?
                (?:
                    (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
                )?
                (?:
                    (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
                )?Z?$''', s)
        if m:
            days, hours, mins, secs, ms = m.groups()
        else:
            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
            if m:
                hours, mins = m.groups()
            else:
                return None

    if ms:
        ms = ms.replace(':', '.')
    return sum(float(part or 0) * mult for part, mult in (
        (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))


def _change_extension(prepend, filename, ext, expected_real_ext=None):
    name, real_ext = os.path.splitext(filename)

    if not expected_real_ext or real_ext[1:] == expected_real_ext:
        filename = name
        if prepend and real_ext:
            _UnsafeExtensionError.sanitize_extension(ext, prepend=True)
            return f'{filename}.{ext}{real_ext}'

    return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'


prepend_extension = functools.partial(_change_extension, True)
replace_extension = functools.partial(_change_extension, False)


def check_executable(exe, args=[]):
    """ Checks if the given binary is installed somewhere in PATH, and returns its name.
    args can be a list of arguments for a short output (like -version) """
    try:
        Popen.run([exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    except OSError:
        return False
    return exe


def _get_exe_version_output(exe, args):
    try:
        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
        # SIGTTOU if yt-dlp is run in the background.
        # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
        stdout, _, ret = Popen.run([encodeArgument(exe), *args], text=True,
                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
        if ret:
            return None
    except OSError:
        return False
    return stdout


def detect_exe_version(output, version_re=None, unrecognized='present'):
    assert isinstance(output, str)
    if version_re is None:
        version_re = r'version\s+([-0-9._a-zA-Z]+)'
    m = re.search(version_re, output)
    if m:
        return m.group(1)
    else:
        return unrecognized


def get_exe_version(exe, args=['--version'],
                    version_re=None, unrecognized=('present', 'broken')):
    """ Returns the version of the specified executable,
    or False if the executable is not present """
    unrecognized = variadic(unrecognized)
    assert len(unrecognized) in (1, 2)
    out = _get_exe_version_output(exe, args)
    if out is None:
        return unrecognized[-1]
    return out and detect_exe_version(out, version_re, unrecognized[0])


def frange(start=0, stop=None, step=1):
    """Float range"""
    if stop is None:
        start, stop = 0, start
    sign = [-1, 1][step > 0] if step else 0
    while sign * start < sign * stop:
        yield start
        start += step


class LazyList(collections.abc.Sequence):
    """Lazy immutable list from an iterable
    Note that slices of a LazyList are lists and not LazyList"""

    class IndexError(IndexError):  # noqa: A001
        pass

    def __init__(self, iterable, *, reverse=False, _cache=None):
        self._iterable = iter(iterable)
        self._cache = [] if _cache is None else _cache
        self._reversed = reverse

    def __iter__(self):
        if self._reversed:
            # We need to consume the entire iterable to iterate in reverse
            yield from self.exhaust()
            return
        yield from self._cache
        for item in self._iterable:
            self._cache.append(item)
            yield item

    def _exhaust(self):
        self._cache.extend(self._iterable)
        self._iterable = []  # Discard the emptied iterable to make it pickle-able
        return self._cache

    def exhaust(self):
        """Evaluate the entire iterable"""
        return self._exhaust()[::-1 if self._reversed else 1]

    @staticmethod
    def _reverse_index(x):
        return None if x is None else ~x

    def __getitem__(self, idx):
        if isinstance(idx, slice):
            if self._reversed:
                idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
            start, stop, step = idx.start, idx.stop, idx.step or 1
        elif isinstance(idx, int):
            if self._reversed:
                idx = self._reverse_index(idx)
            start, stop, step = idx, idx, 0
        else:
            raise TypeError('indices must be integers or slices')
        if ((start or 0) < 0 or (stop or 0) < 0
                or (start is None and step < 0)
                or (stop is None and step > 0)):
            # We need to consume the entire iterable to be able to slice from the end
            # Obviously, never use this with infinite iterables
            self._exhaust()
            try:
                return self._cache[idx]
            except IndexError as e:
                raise self.IndexError(e) from e
        n = max(start or 0, stop or 0) - len(self._cache) + 1
        if n > 0:
            self._cache.extend(itertools.islice(self._iterable, n))
        try:
            return self._cache[idx]
        except IndexError as e:
            raise self.IndexError(e) from e

    def __bool__(self):
        try:
            self[-1] if self._reversed else self[0]
        except self.IndexError:
            return False
        return True

    def __len__(self):
        self._exhaust()
        return len(self._cache)

    def __reversed__(self):
        return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)

    def __copy__(self):
        return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)

    def __repr__(self):
        # repr and str should mimic a list. So we exhaust the iterable
        return repr(self.exhaust())

    def __str__(self):
        return repr(self.exhaust())


class PagedList:

    class IndexError(IndexError):  # noqa: A001
        pass

    def __len__(self):
        # This is only useful for tests
        return len(self.getslice())

    def __init__(self, pagefunc, pagesize, use_cache=True):
        self._pagefunc = pagefunc
        self._pagesize = pagesize
        self._pagecount = float('inf')
        self._use_cache = use_cache
        self._cache = {}

    def getpage(self, pagenum):
        page_results = self._cache.get(pagenum)
        if page_results is None:
            page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
        if self._use_cache:
            self._cache[pagenum] = page_results
        return page_results

    def getslice(self, start=0, end=None):
        return list(self._getslice(start, end))

    def _getslice(self, start, end):
        raise NotImplementedError('This method must be implemented by subclasses')

    def __getitem__(self, idx):
        assert self._use_cache, 'Indexing PagedList requires cache'
        if not isinstance(idx, int) or idx < 0:
            raise TypeError('indices must be non-negative integers')
        entries = self.getslice(idx, idx + 1)
        if not entries:
            raise self.IndexError
        return entries[0]

    def __bool__(self):
        return bool(self.getslice(0, 1))


class OnDemandPagedList(PagedList):
    """Download pages until a page with less than maximum results"""

    def _getslice(self, start, end):
        for pagenum in itertools.count(start // self._pagesize):
            firstid = pagenum * self._pagesize
            nextfirstid = pagenum * self._pagesize + self._pagesize
            if start >= nextfirstid:
                continue

            startv = (
                start % self._pagesize
                if firstid <= start < nextfirstid
                else 0)
            endv = (
                ((end - 1) % self._pagesize) + 1
                if (end is not None and firstid <= end <= nextfirstid)
                else None)

            try:
                page_results = self.getpage(pagenum)
            except Exception:
                self._pagecount = pagenum - 1
                raise
            if startv != 0 or endv is not None:
                page_results = page_results[startv:endv]
            yield from page_results

            # A little optimization - if current page is not "full", ie. does
            # not contain page_size videos then we can assume that this page
            # is the last one - there are no more ids on further pages -
            # i.e. no need to query again.
            if len(page_results) + startv < self._pagesize:
                break

            # If we got the whole page, but the next page is not interesting,
            # break out early as well
            if end == nextfirstid:
                break


class InAdvancePagedList(PagedList):
    """PagedList with total number of pages known in advance"""

    def __init__(self, pagefunc, pagecount, pagesize):
        PagedList.__init__(self, pagefunc, pagesize, True)
        self._pagecount = pagecount

    def _getslice(self, start, end):
        start_page = start // self._pagesize
        end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
        skip_elems = start - start_page * self._pagesize
        only_more = None if end is None else end - start
        for pagenum in range(start_page, end_page):
            page_results = self.getpage(pagenum)
            if skip_elems:
                page_results = page_results[skip_elems:]
                skip_elems = None
            if only_more is not None:
                if len(page_results) < only_more:
                    only_more -= len(page_results)
                else:
                    yield from page_results[:only_more]
                    break
            yield from page_results


class PlaylistEntries:
    MissingEntry = object()
    is_exhausted = False

    def __init__(self, ydl, info_dict):
        self.ydl = ydl

        # _entries must be assigned now since infodict can change during iteration
        entries = info_dict.get('entries')
        if entries is None:
            raise EntryNotInPlaylist('There are no entries')
        elif isinstance(entries, list):
            self.is_exhausted = True

        requested_entries = info_dict.get('requested_entries')
        self.is_incomplete = requested_entries is not None
        if self.is_incomplete:
            assert self.is_exhausted
            self._entries = [self.MissingEntry] * max(requested_entries or [0])
            for i, entry in zip(requested_entries, entries):
                self._entries[i - 1] = entry
        elif isinstance(entries, (list, PagedList, LazyList)):
            self._entries = entries
        else:
            self._entries = LazyList(entries)

    PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
        (?P<start>[+-]?\d+)?
        (?P<range>[:-]
            (?P<end>[+-]?\d+|inf(?:inite)?)?
            (?::(?P<step>[+-]?\d+))?
        )?''')

    @classmethod
    def parse_playlist_items(cls, string):
        for segment in string.split(','):
            if not segment:
                raise ValueError('There is two or more consecutive commas')
            mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
            if not mobj:
                raise ValueError(f'{segment!r} is not a valid specification')
            start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
            if int_or_none(step) == 0:
                raise ValueError(f'Step in {segment!r} cannot be zero')
            yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)

    def get_requested_items(self):
        playlist_items = self.ydl.params.get('playlist_items')
        playlist_start = self.ydl.params.get('playliststart', 1)
        playlist_end = self.ydl.params.get('playlistend')
        # For backwards compatibility, interpret -1 as whole list
        if playlist_end in (-1, None):
            playlist_end = ''
        if not playlist_items:
            playlist_items = f'{playlist_start}:{playlist_end}'
        elif playlist_start != 1 or playlist_end:
            self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)

        for index in self.parse_playlist_items(playlist_items):
            for i, entry in self[index]:
                yield i, entry
                if not entry:
                    continue
                try:
                    # The item may have just been added to archive. Don't break due to it
                    if not self.ydl.params.get('lazy_playlist'):
                        # TODO: Add auto-generated fields
                        self.ydl._match_entry(entry, incomplete=True, silent=True)
                except (ExistingVideoReached, RejectedVideoReached):
                    return

    def get_full_count(self):
        if self.is_exhausted and not self.is_incomplete:
            return len(self)
        elif isinstance(self._entries, InAdvancePagedList):
            if self._entries._pagesize == 1:
                return self._entries._pagecount

    @functools.cached_property
    def _getter(self):
        if isinstance(self._entries, list):
            def get_entry(i):
                try:
                    entry = self._entries[i]
                except IndexError:
                    entry = self.MissingEntry
                    if not self.is_incomplete:
                        raise self.IndexError
                if entry is self.MissingEntry:
                    raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
                return entry
        else:
            def get_entry(i):
                try:
                    return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
                except (LazyList.IndexError, PagedList.IndexError):
                    raise self.IndexError
        return get_entry

    def __getitem__(self, idx):
        if isinstance(idx, int):
            idx = slice(idx, idx)

        # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
        step = 1 if idx.step is None else idx.step
        if idx.start is None:
            start = 0 if step > 0 else len(self) - 1
        else:
            start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start

        # NB: Do not call len(self) when idx == [:]
        if idx.stop is None:
            stop = 0 if step < 0 else float('inf')
        else:
            stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
        stop += [-1, 1][step > 0]

        for i in frange(start, stop, step):
            if i < 0:
                continue
            try:
                entry = self._getter(i)
            except self.IndexError:
                self.is_exhausted = True
                if step > 0:
                    break
                continue
            yield i + 1, entry

    def __len__(self):
        return len(tuple(self[:]))

    class IndexError(IndexError):  # noqa: A001
        pass


def uppercase_escape(s):
    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\U[0-9a-fA-F]{8}',
        lambda m: unicode_escape(m.group(0))[0],
        s)


def lowercase_escape(s):
    unicode_escape = codecs.getdecoder('unicode_escape')
    return re.sub(
        r'\\u[0-9a-fA-F]{4}',
        lambda m: unicode_escape(m.group(0))[0],
        s)


def parse_qs(url, **kwargs):
    return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)


def read_batch_urls(batch_fd):
    def fixup(url):
        if not isinstance(url, str):
            url = url.decode('utf-8', 'replace')
        BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
        for bom in BOM_UTF8:
            if url.startswith(bom):
                url = url[len(bom):]
        url = url.lstrip()
        if not url or url.startswith(('#', ';', ']')):
            return False
        # "#" cannot be stripped out since it is part of the URI
        # However, it can be safely stripped out if following a whitespace
        return re.split(r'\s#', url, maxsplit=1)[0].rstrip()

    with contextlib.closing(batch_fd) as fd:
        return [url for url in map(fixup, fd) if url]


def urlencode_postdata(*args, **kargs):
    return urllib.parse.urlencode(*args, **kargs).encode('ascii')


def update_url(url, *, query_update=None, **kwargs):
    """Replace URL components specified by kwargs
       @param url           str or parse url tuple
       @param query_update  update query
       @returns             str
    """
    if isinstance(url, str):
        if not kwargs and not query_update:
            return url
        else:
            url = urllib.parse.urlparse(url)
    if query_update:
        assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
        kwargs['query'] = urllib.parse.urlencode({
            **urllib.parse.parse_qs(url.query),
            **query_update,
        }, True)
    return urllib.parse.urlunparse(url._replace(**kwargs))


def update_url_query(url, query):
    return update_url(url, query_update=query)


def _multipart_encode_impl(data, boundary):
    content_type = f'multipart/form-data; boundary={boundary}'

    out = b''
    for k, v in data.items():
        out += b'--' + boundary.encode('ascii') + b'\r\n'
        if isinstance(k, str):
            k = k.encode()
        if isinstance(v, str):
            v = v.encode()
        # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
        # suggests sending UTF-8 directly. Firefox sends UTF-8, too
        content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
        if boundary.encode('ascii') in content:
            raise ValueError('Boundary overlaps with data')
        out += content

    out += b'--' + boundary.encode('ascii') + b'--\r\n'

    return out, content_type


def multipart_encode(data, boundary=None):
    """
    Encode a dict to RFC 7578-compliant form-data

    data:
        A dict where keys and values can be either Unicode or bytes-like
        objects.
    boundary:
        If specified a Unicode object, it's used as the boundary. Otherwise
        a random boundary is generated.

    Reference: https://tools.ietf.org/html/rfc7578
    """
    has_specified_boundary = boundary is not None

    while True:
        if boundary is None:
            boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))

        try:
            out, content_type = _multipart_encode_impl(data, boundary)
            break
        except ValueError:
            if has_specified_boundary:
                raise
            boundary = None

    return out, content_type


def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
    if blocked_types is NO_DEFAULT:
        blocked_types = (str, bytes, collections.abc.Mapping)
    return isinstance(x, allowed_types) and not isinstance(x, blocked_types)


def variadic(x, allowed_types=NO_DEFAULT):
    if not isinstance(allowed_types, (tuple, type)):
        deprecation_warning('allowed_types should be a tuple or a type')
        allowed_types = tuple(allowed_types)
    return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )


def try_call(*funcs, expected_type=None, args=[], kwargs={}):
    for f in funcs:
        try:
            val = f(*args, **kwargs)
        except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
            pass
        else:
            if expected_type is None or isinstance(val, expected_type):
                return val


def try_get(src, getter, expected_type=None):
    return try_call(*variadic(getter), args=(src,), expected_type=expected_type)


def filter_dict(dct, cndn=lambda _, v: v is not None):
    return {k: v for k, v in dct.items() if cndn(k, v)}


def merge_dicts(*dicts):
    merged = {}
    for a_dict in dicts:
        for k, v in a_dict.items():
            if (v is not None and k not in merged
                    or isinstance(v, str) and merged[k] == ''):
                merged[k] = v
    return merged


def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
    return string if isinstance(string, str) else str(string, encoding, errors)


US_RATINGS = {
    'G': 0,
    'PG': 10,
    'PG-13': 13,
    'R': 16,
    'NC': 18,
}


TV_PARENTAL_GUIDELINES = {
    'TV-Y': 0,
    'TV-Y7': 7,
    'TV-G': 0,
    'TV-PG': 0,
    'TV-14': 14,
    'TV-MA': 17,
}


def parse_age_limit(s):
    # isinstance(False, int) is True. So type() must be used instead
    if type(s) is int:  # noqa: E721
        return s if 0 <= s <= 21 else None
    elif not isinstance(s, str):
        return None
    m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
    if m:
        return int(m.group('age'))
    s = s.upper()
    if s in US_RATINGS:
        return US_RATINGS[s]
    m = re.match(r'^TV[_-]?({})$'.format('|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES)), s)
    if m:
        return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
    return None


def strip_jsonp(code):
    return re.sub(
        r'''(?sx)^
            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
            (?:\s*&&\s*(?P=func_name))?
            \s*\(\s*(?P<callback_data>.*)\);?
            \s*?(?://[^\n]*)*$''',
        r'\g<callback_data>', code)


def js_to_json(code, vars={}, *, strict=False):
    # vars is a dict of var, val pairs to substitute
    STRING_QUOTES = '\'"`'
    STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
    SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
    INTEGER_TABLE = (
        (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
        (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
    )

    def process_escape(match):
        JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
        escape = match.group(1) or match.group(2)

        return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
                else R'\u00' if escape == 'x'
                else '' if escape == '\n'
                else escape)

    def template_substitute(match):
        evaluated = js_to_json(match.group(1), vars, strict=strict)
        if evaluated[0] == '"':
            return json.loads(evaluated)
        return evaluated

    def fix_kv(m):
        v = m.group(0)
        if v in ('true', 'false', 'null'):
            return v
        elif v in ('undefined', 'void 0'):
            return 'null'
        elif v.startswith(('/*', '//', '!')) or v == ',':
            return ''

        if v[0] in STRING_QUOTES:
            v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
            return f'"{escaped}"'

        for regex, base in INTEGER_TABLE:
            im = re.match(regex, v)
            if im:
                i = int(im.group(1), base)
                return f'"{i}":' if v.endswith(':') else str(i)

        if v in vars:
            try:
                if not strict:
                    json.loads(vars[v])
            except json.JSONDecodeError:
                return json.dumps(vars[v])
            else:
                return vars[v]

        if not strict:
            return f'"{v}"'

        raise ValueError(f'Unknown value: {v}')

    def create_map(mobj):
        return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))

    code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
    code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
    if not strict:
        code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code)
        code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
        code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
        code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)

    return re.sub(rf'''(?sx)
        {STRING_RE}|
        {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
        void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
        [0-9]+(?={SKIP_RE}:)|
        !+
        ''', fix_kv, code)


def qualities(quality_ids):
    """ Get a numeric quality value out of a list of possible values """
    def q(qid):
        try:
            return quality_ids.index(qid)
        except ValueError:
            return -1
    return q


POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')


DEFAULT_OUTTMPL = {
    'default': '%(title)s [%(id)s].%(ext)s',
    'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
}
OUTTMPL_TYPES = {
    'chapter': None,
    'subtitle': None,
    'thumbnail': None,
    'description': 'description',
    'annotation': 'annotations.xml',
    'infojson': 'info.json',
    'link': None,
    'pl_video': None,
    'pl_thumbnail': None,
    'pl_description': 'description',
    'pl_infojson': 'info.json',
}

# As of [1] format syntax is:
#  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
STR_FORMAT_RE_TMPL = r'''(?x)
    (?<!%)(?P<prefix>(?:%%)*)
    %
    (?P<has_key>\((?P<key>{0})\))?
    (?P<format>
        (?P<conversion>[#0\-+ ]+)?
        (?P<min_width>\d+)?
        (?P<precision>\.\d+)?
        (?P<len_mod>[hlL])?  # unused in python
        {1}  # conversion type
    )
'''


STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'


def limit_length(s, length):
    """ Add ellipses to overly long strings """
    if s is None:
        return None
    ELLIPSES = '...'
    if len(s) > length:
        return s[:length - len(ELLIPSES)] + ELLIPSES
    return s


def version_tuple(v):
    return tuple(int(e) for e in re.split(r'[-.]', v))


def is_outdated_version(version, limit, assume_new=True):
    if not version:
        return not assume_new
    try:
        return version_tuple(version) < version_tuple(limit)
    except ValueError:
        return not assume_new


def ytdl_is_updateable():
    """ Returns if yt-dlp can be updated with -U """

    from ..update import is_non_updateable

    return not is_non_updateable()


def args_to_str(args):
    # Get a short string representation for a subprocess command
    return shell_quote(args)


def error_to_str(err):
    return f'{type(err).__name__}: {err}'


def mimetype2ext(mt, default=NO_DEFAULT):
    if not isinstance(mt, str):
        if default is not NO_DEFAULT:
            return default
        return None

    MAP = {
        # video
        '3gpp': '3gp',
        'mp2t': 'ts',
        'mp4': 'mp4',
        'mpeg': 'mpeg',
        'mpegurl': 'm3u8',
        'quicktime': 'mov',
        'webm': 'webm',
        'vp9': 'vp9',
        'video/ogg': 'ogv',
        'x-flv': 'flv',
        'x-m4v': 'm4v',
        'x-matroska': 'mkv',
        'x-mng': 'mng',
        'x-mp4-fragmented': 'mp4',
        'x-ms-asf': 'asf',
        'x-ms-wmv': 'wmv',
        'x-msvideo': 'avi',

        # application (streaming playlists)
        'dash+xml': 'mpd',
        'f4m+xml': 'f4m',
        'hds+xml': 'f4m',
        'vnd.apple.mpegurl': 'm3u8',
        'vnd.ms-sstr+xml': 'ism',
        'x-mpegurl': 'm3u8',

        # audio
        'audio/mp4': 'm4a',
        # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
        # Using .mp3 as it's the most popular one
        'audio/mpeg': 'mp3',
        'audio/webm': 'webm',
        'audio/x-matroska': 'mka',
        'audio/x-mpegurl': 'm3u',
        'midi': 'mid',
        'ogg': 'ogg',
        'wav': 'wav',
        'wave': 'wav',
        'x-aac': 'aac',
        'x-flac': 'flac',
        'x-m4a': 'm4a',
        'x-realaudio': 'ra',
        'x-wav': 'wav',

        # image
        'avif': 'avif',
        'bmp': 'bmp',
        'gif': 'gif',
        'jpeg': 'jpg',
        'png': 'png',
        'svg+xml': 'svg',
        'tiff': 'tif',
        'vnd.wap.wbmp': 'wbmp',
        'webp': 'webp',
        'x-icon': 'ico',
        'x-jng': 'jng',
        'x-ms-bmp': 'bmp',

        # caption
        'filmstrip+json': 'fs',
        'smptett+xml': 'tt',
        'ttaf+xml': 'dfxp',
        'ttml+xml': 'ttml',
        'x-ms-sami': 'sami',

        # misc
        'gzip': 'gz',
        'json': 'json',
        'xml': 'xml',
        'zip': 'zip',
    }

    mimetype = mt.partition(';')[0].strip().lower()
    _, _, subtype = mimetype.rpartition('/')

    ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
    if ext:
        return ext
    elif default is not NO_DEFAULT:
        return default
    return subtype.replace('+', '.')


def ext2mimetype(ext_or_url):
    if not ext_or_url:
        return None
    if '.' not in ext_or_url:
        ext_or_url = f'file.{ext_or_url}'
    return mimetypes.guess_type(ext_or_url)[0]


def parse_codecs(codecs_str):
    # http://tools.ietf.org/html/rfc6381
    if not codecs_str:
        return {}
    split_codecs = list(filter(None, map(
        str.strip, codecs_str.strip().strip(',').split(','))))
    vcodec, acodec, scodec, hdr = None, None, None, None
    for full_codec in split_codecs:
        full_codec = re.sub(r'^([^.]+)', lambda m: m.group(1).lower(), full_codec)
        parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
        if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
                        'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
            if vcodec:
                continue
            vcodec = full_codec
            if parts[0] in ('dvh1', 'dvhe'):
                hdr = 'DV'
            elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
                hdr = 'HDR10'
            elif parts[:2] == ['vp9', '2']:
                hdr = 'HDR10'
        elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
                          'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
            acodec = acodec or full_codec
        elif parts[0] in ('stpp', 'wvtt'):
            scodec = scodec or full_codec
        else:
            write_string(f'WARNING: Unknown codec {full_codec}\n')
    if vcodec or acodec or scodec:
        return {
            'vcodec': vcodec or 'none',
            'acodec': acodec or 'none',
            'dynamic_range': hdr,
            **({'scodec': scodec} if scodec is not None else {}),
        }
    elif len(split_codecs) == 2:
        return {
            'vcodec': split_codecs[0],
            'acodec': split_codecs[1],
        }
    return {}


def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
    assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)

    allow_mkv = not preferences or 'mkv' in preferences

    if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
        return 'mkv'  # TODO: any other format allows this?

    # TODO: All codecs supported by parse_codecs isn't handled here
    COMPATIBLE_CODECS = {
        'mp4': {
            'av1', 'hevc', 'avc1', 'mp4a', 'ac-4',  # fourcc (m3u8, mpd)
            'h264', 'aacl', 'ec-3',  # Set in ISM
        },
        'webm': {
            'av1', 'vp9', 'vp8', 'opus', 'vrbs',
            'vp9x', 'vp8x',  # in the webm spec
        },
    }

    sanitize_codec = functools.partial(
        try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
    vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)

    for ext in preferences or COMPATIBLE_CODECS.keys():
        codec_set = COMPATIBLE_CODECS.get(ext, set())
        if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
            return ext

    COMPATIBLE_EXTS = (
        {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
        {'webm', 'weba'},
    )
    for ext in preferences or vexts:
        current_exts = {ext, *vexts, *aexts}
        if ext == 'mkv' or current_exts == {ext} or any(
                ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
            return ext
    return 'mkv' if allow_mkv else preferences[-1]


def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
    getheader = url_handle.headers.get

    cd = getheader('Content-Disposition')
    if cd:
        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
        if m:
            e = determine_ext(m.group('filename'), default_ext=None)
            if e:
                return e

    meta_ext = getheader('x-amz-meta-name')
    if meta_ext:
        e = meta_ext.rpartition('.')[2]
        if e:
            return e

    return mimetype2ext(getheader('Content-Type'), default=default)


def encode_data_uri(data, mime_type):
    return 'data:{};base64,{}'.format(mime_type, base64.b64encode(data).decode('ascii'))


def age_restricted(content_limit, age_limit):
    """ Returns True iff the content should be blocked """

    if age_limit is None:  # No limit set
        return False
    if content_limit is None:
        return False  # Content available for everyone
    return age_limit < content_limit


# List of known byte-order-marks (BOM)
BOMS = [
    (b'\xef\xbb\xbf', 'utf-8'),
    (b'\x00\x00\xfe\xff', 'utf-32-be'),
    (b'\xff\xfe\x00\x00', 'utf-32-le'),
    (b'\xff\xfe', 'utf-16-le'),
    (b'\xfe\xff', 'utf-16-be'),
]


def is_html(first_bytes):
    """ Detect whether a file contains HTML by examining its first bytes. """

    encoding = 'utf-8'
    for bom, enc in BOMS:
        while first_bytes.startswith(bom):
            encoding, first_bytes = enc, first_bytes[len(bom):]

    return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))


def determine_protocol(info_dict):
    protocol = info_dict.get('protocol')
    if protocol is not None:
        return protocol

    url = sanitize_url(info_dict['url'])
    if url.startswith('rtmp'):
        return 'rtmp'
    elif url.startswith('mms'):
        return 'mms'
    elif url.startswith('rtsp'):
        return 'rtsp'

    ext = determine_ext(url)
    if ext == 'm3u8':
        return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
    elif ext == 'f4m':
        return 'f4m'

    return urllib.parse.urlparse(url).scheme


def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
    """ Render a list of rows, each as a list of values.
    Text after a \t will be right aligned """
    def width(string):
        return len(remove_terminal_sequences(string).replace('\t', ''))

    def get_max_lens(table):
        return [max(width(str(v)) for v in col) for col in zip(*table)]

    def filter_using_list(row, filter_array):
        return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take]

    max_lens = get_max_lens(data) if hide_empty else []
    header_row = filter_using_list(header_row, max_lens)
    data = [filter_using_list(row, max_lens) for row in data]

    table = [header_row, *data]
    max_lens = get_max_lens(table)
    extra_gap += 1
    if delim:
        table = [header_row, [delim * (ml + extra_gap) for ml in max_lens], *data]
        table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
    for row in table:
        for pos, text in enumerate(map(str, row)):
            if '\t' in text:
                row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
            else:
                row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
    return '\n'.join(''.join(row).rstrip() for row in table)


def _match_one(filter_part, dct, incomplete):
    # TODO: Generalize code with YoutubeDL._build_format_filter
    STRING_OPERATORS = {
        '*=': operator.contains,
        '^=': lambda attr, value: attr.startswith(value),
        '$=': lambda attr, value: attr.endswith(value),
        '~=': lambda attr, value: re.search(value, attr),
    }
    COMPARISON_OPERATORS = {
        **STRING_OPERATORS,
        '<=': operator.le,  # "<=" must be defined above "<"
        '<': operator.lt,
        '>=': operator.ge,
        '>': operator.gt,
        '=': operator.eq,
    }

    if isinstance(incomplete, bool):
        is_incomplete = lambda _: incomplete
    else:
        is_incomplete = lambda k: k in incomplete

    operator_rex = re.compile(r'''(?x)
        (?P<key>[a-z_]+)
        \s*(?P<negation>!\s*)?(?P<op>{})(?P<none_inclusive>\s*\?)?\s*
        (?:
            (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
            (?P<strval>.+?)
        )
        '''.format('|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))))
    m = operator_rex.fullmatch(filter_part.strip())
    if m:
        m = m.groupdict()
        unnegated_op = COMPARISON_OPERATORS[m['op']]
        if m['negation']:
            op = lambda attr, value: not unnegated_op(attr, value)
        else:
            op = unnegated_op
        comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
        if m['quote']:
            comparison_value = comparison_value.replace(r'\{}'.format(m['quote']), m['quote'])
        actual_value = dct.get(m['key'])
        numeric_comparison = None
        if isinstance(actual_value, (int, float)):
            # If the original field is a string and matching comparisonvalue is
            # a number we should respect the origin of the original field
            # and process comparison value as a string (see
            # https://github.com/ytdl-org/youtube-dl/issues/11082)
            try:
                numeric_comparison = int(comparison_value)
            except ValueError:
                numeric_comparison = parse_filesize(comparison_value)
                if numeric_comparison is None:
                    numeric_comparison = parse_filesize(f'{comparison_value}B')
                if numeric_comparison is None:
                    numeric_comparison = parse_duration(comparison_value)
        if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
            raise ValueError('Operator {} only supports string values!'.format(m['op']))
        if actual_value is None:
            return is_incomplete(m['key']) or m['none_inclusive']
        return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)

    UNARY_OPERATORS = {
        '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
        '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
    }
    operator_rex = re.compile(r'''(?x)
        (?P<op>{})\s*(?P<key>[a-z_]+)
        '''.format('|'.join(map(re.escape, UNARY_OPERATORS.keys()))))
    m = operator_rex.fullmatch(filter_part.strip())
    if m:
        op = UNARY_OPERATORS[m.group('op')]
        actual_value = dct.get(m.group('key'))
        if is_incomplete(m.group('key')) and actual_value is None:
            return True
        return op(actual_value)

    raise ValueError(f'Invalid filter part {filter_part!r}')


def match_str(filter_str, dct, incomplete=False):
    """ Filter a dictionary with a simple string syntax.
    @returns           Whether the filter passes
    @param incomplete  Set of keys that is expected to be missing from dct.
                       Can be True/False to indicate all/none of the keys may be missing.
                       All conditions on incomplete keys pass if the key is missing
    """
    return all(
        _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
        for filter_part in re.split(r'(?<!\\)&', filter_str))


def match_filter_func(filters, breaking_filters=None):
    if not filters and not breaking_filters:
        return None
    repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'

    breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
    filters = set(variadic(filters or []))

    interactive = '-' in filters
    if interactive:
        filters.remove('-')

    @function_with_repr.set_repr(repr_)
    def _match_func(info_dict, incomplete=False):
        ret = breaking_filters(info_dict, incomplete)
        if ret is not None:
            raise RejectedVideoReached(ret)

        if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
            return NO_DEFAULT if interactive and not incomplete else None
        else:
            video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
            filter_str = ') | ('.join(map(str.strip, filters))
            return f'{video_title} does not pass filter ({filter_str}), skipping ..'
    return _match_func


class download_range_func:
    def __init__(self, chapters, ranges, from_info=False):
        self.chapters, self.ranges, self.from_info = chapters, ranges, from_info

    def __call__(self, info_dict, ydl):

        warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
                   else 'Cannot match chapters since chapter information is unavailable')
        for regex in self.chapters or []:
            for i, chapter in enumerate(info_dict.get('chapters') or []):
                if re.search(regex, chapter['title']):
                    warning = None
                    yield {**chapter, 'index': i}
        if self.chapters and warning:
            ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')

        for start, end in self.ranges or []:
            yield {
                'start_time': self._handle_negative_timestamp(start, info_dict),
                'end_time': self._handle_negative_timestamp(end, info_dict),
            }

        if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
            yield {
                'start_time': info_dict.get('start_time') or 0,
                'end_time': info_dict.get('end_time') or float('inf'),
            }
        elif not self.ranges and not self.chapters:
            yield {}

    @staticmethod
    def _handle_negative_timestamp(time, info):
        return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time

    def __eq__(self, other):
        return (isinstance(other, download_range_func)
                and self.chapters == other.chapters and self.ranges == other.ranges)

    def __repr__(self):
        return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'


def parse_dfxp_time_expr(time_expr):
    if not time_expr:
        return

    mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
    if mobj:
        return float(mobj.group('time_offset'))

    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
    if mobj:
        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))


def srt_subtitles_timecode(seconds):
    return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)


def ass_subtitles_timecode(seconds):
    time = timetuple_from_msec(seconds * 1000)
    return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)


def dfxp2srt(dfxp_data):
    """
    @param dfxp_data A bytes-like object containing DFXP data
    @returns A unicode object containing converted SRT data
    """
    LEGACY_NAMESPACES = (
        (b'http://www.w3.org/ns/ttml', [
            b'http://www.w3.org/2004/11/ttaf1',
            b'http://www.w3.org/2006/04/ttaf1',
            b'http://www.w3.org/2006/10/ttaf1',
        ]),
        (b'http://www.w3.org/ns/ttml#styling', [
            b'http://www.w3.org/ns/ttml#style',
        ]),
    )

    SUPPORTED_STYLING = [
        'color',
        'fontFamily',
        'fontSize',
        'fontStyle',
        'fontWeight',
        'textDecoration',
    ]

    _x = functools.partial(xpath_with_ns, ns_map={
        'xml': 'http://www.w3.org/XML/1998/namespace',
        'ttml': 'http://www.w3.org/ns/ttml',
        'tts': 'http://www.w3.org/ns/ttml#styling',
    })

    styles = {}
    default_style = {}

    class TTMLPElementParser:
        _out = ''
        _unclosed_elements = []
        _applied_styles = []

        def start(self, tag, attrib):
            if tag in (_x('ttml:br'), 'br'):
                self._out += '\n'
            else:
                unclosed_elements = []
                style = {}
                element_style_id = attrib.get('style')
                if default_style:
                    style.update(default_style)
                if element_style_id:
                    style.update(styles.get(element_style_id, {}))
                for prop in SUPPORTED_STYLING:
                    prop_val = attrib.get(_x('tts:' + prop))
                    if prop_val:
                        style[prop] = prop_val
                if style:
                    font = ''
                    for k, v in sorted(style.items()):
                        if self._applied_styles and self._applied_styles[-1].get(k) == v:
                            continue
                        if k == 'color':
                            font += f' color="{v}"'
                        elif k == 'fontSize':
                            font += f' size="{v}"'
                        elif k == 'fontFamily':
                            font += f' face="{v}"'
                        elif k == 'fontWeight' and v == 'bold':
                            self._out += '<b>'
                            unclosed_elements.append('b')
                        elif k == 'fontStyle' and v == 'italic':
                            self._out += '<i>'
                            unclosed_elements.append('i')
                        elif k == 'textDecoration' and v == 'underline':
                            self._out += '<u>'
                            unclosed_elements.append('u')
                    if font:
                        self._out += '<font' + font + '>'
                        unclosed_elements.append('font')
                    applied_style = {}
                    if self._applied_styles:
                        applied_style.update(self._applied_styles[-1])
                    applied_style.update(style)
                    self._applied_styles.append(applied_style)
                self._unclosed_elements.append(unclosed_elements)

        def end(self, tag):
            if tag not in (_x('ttml:br'), 'br'):
                unclosed_elements = self._unclosed_elements.pop()
                for element in reversed(unclosed_elements):
                    self._out += f'</{element}>'
                if unclosed_elements and self._applied_styles:
                    self._applied_styles.pop()

        def data(self, data):
            self._out += data

        def close(self):
            return self._out.strip()

    # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
    # This will not trigger false positives since only UTF-8 text is being replaced
    dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')

    def parse_node(node):
        target = TTMLPElementParser()
        parser = xml.etree.ElementTree.XMLParser(target=target)
        parser.feed(xml.etree.ElementTree.tostring(node))
        return parser.close()

    for k, v in LEGACY_NAMESPACES:
        for ns in v:
            dfxp_data = dfxp_data.replace(ns, k)

    dfxp = compat_etree_fromstring(dfxp_data)
    out = []
    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')

    if not paras:
        raise ValueError('Invalid dfxp/TTML subtitle')

    repeat = False
    while True:
        for style in dfxp.findall(_x('.//ttml:style')):
            style_id = style.get('id') or style.get(_x('xml:id'))
            if not style_id:
                continue
            parent_style_id = style.get('style')
            if parent_style_id:
                if parent_style_id not in styles:
                    repeat = True
                    continue
                styles[style_id] = styles[parent_style_id].copy()
            for prop in SUPPORTED_STYLING:
                prop_val = style.get(_x('tts:' + prop))
                if prop_val:
                    styles.setdefault(style_id, {})[prop] = prop_val
        if repeat:
            repeat = False
        else:
            break

    for p in ('body', 'div'):
        ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
        if ele is None:
            continue
        style = styles.get(ele.get('style'))
        if not style:
            continue
        default_style.update(style)

    for para, index in zip(paras, itertools.count(1)):
        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
        dur = parse_dfxp_time_expr(para.attrib.get('dur'))
        if begin_time is None:
            continue
        if not end_time:
            if not dur:
                continue
            end_time = begin_time + dur
        out.append('%d\n%s --> %s\n%s\n\n' % (
            index,
            srt_subtitles_timecode(begin_time),
            srt_subtitles_timecode(end_time),
            parse_node(para)))

    return ''.join(out)


def cli_option(params, command_option, param, separator=None):
    param = params.get(param)
    return ([] if param is None
            else [command_option, str(param)] if separator is None
            else [f'{command_option}{separator}{param}'])


def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
    param = params.get(param)
    assert param in (True, False, None)
    return cli_option({True: true_value, False: false_value}, command_option, param, separator)


def cli_valueless_option(params, command_option, param, expected_value=True):
    return [command_option] if params.get(param) == expected_value else []


def cli_configuration_args(argdict, keys, default=[], use_compat=True):
    if isinstance(argdict, (list, tuple)):  # for backward compatibility
        if use_compat:
            return argdict
        else:
            argdict = None
    if argdict is None:
        return default
    assert isinstance(argdict, dict)

    assert isinstance(keys, (list, tuple))
    for key_list in keys:
        arg_list = list(filter(
            lambda x: x is not None,
            [argdict.get(key.lower()) for key in variadic(key_list)]))
        if arg_list:
            return [arg for args in arg_list for arg in args]
    return default


def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
    main_key, exe = main_key.lower(), exe.lower()
    root_key = exe if main_key == exe else f'{main_key}+{exe}'
    keys = [f'{root_key}{k}' for k in (keys or [''])]
    if root_key in keys:
        if main_key != exe:
            keys.append((main_key, exe))
        keys.append('default')
    else:
        use_compat = False
    return cli_configuration_args(argdict, keys, default, use_compat)


class ISO639Utils:
    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
    _lang_map = {
        'aa': 'aar',
        'ab': 'abk',
        'ae': 'ave',
        'af': 'afr',
        'ak': 'aka',
        'am': 'amh',
        'an': 'arg',
        'ar': 'ara',
        'as': 'asm',
        'av': 'ava',
        'ay': 'aym',
        'az': 'aze',
        'ba': 'bak',
        'be': 'bel',
        'bg': 'bul',
        'bh': 'bih',
        'bi': 'bis',
        'bm': 'bam',
        'bn': 'ben',
        'bo': 'bod',
        'br': 'bre',
        'bs': 'bos',
        'ca': 'cat',
        'ce': 'che',
        'ch': 'cha',
        'co': 'cos',
        'cr': 'cre',
        'cs': 'ces',
        'cu': 'chu',
        'cv': 'chv',
        'cy': 'cym',
        'da': 'dan',
        'de': 'deu',
        'dv': 'div',
        'dz': 'dzo',
        'ee': 'ewe',
        'el': 'ell',
        'en': 'eng',
        'eo': 'epo',
        'es': 'spa',
        'et': 'est',
        'eu': 'eus',
        'fa': 'fas',
        'ff': 'ful',
        'fi': 'fin',
        'fj': 'fij',
        'fo': 'fao',
        'fr': 'fra',
        'fy': 'fry',
        'ga': 'gle',
        'gd': 'gla',
        'gl': 'glg',
        'gn': 'grn',
        'gu': 'guj',
        'gv': 'glv',
        'ha': 'hau',
        'he': 'heb',
        'iw': 'heb',  # Replaced by he in 1989 revision
        'hi': 'hin',
        'ho': 'hmo',
        'hr': 'hrv',
        'ht': 'hat',
        'hu': 'hun',
        'hy': 'hye',
        'hz': 'her',
        'ia': 'ina',
        'id': 'ind',
        'in': 'ind',  # Replaced by id in 1989 revision
        'ie': 'ile',
        'ig': 'ibo',
        'ii': 'iii',
        'ik': 'ipk',
        'io': 'ido',
        'is': 'isl',
        'it': 'ita',
        'iu': 'iku',
        'ja': 'jpn',
        'jv': 'jav',
        'ka': 'kat',
        'kg': 'kon',
        'ki': 'kik',
        'kj': 'kua',
        'kk': 'kaz',
        'kl': 'kal',
        'km': 'khm',
        'kn': 'kan',
        'ko': 'kor',
        'kr': 'kau',
        'ks': 'kas',
        'ku': 'kur',
        'kv': 'kom',
        'kw': 'cor',
        'ky': 'kir',
        'la': 'lat',
        'lb': 'ltz',
        'lg': 'lug',
        'li': 'lim',
        'ln': 'lin',
        'lo': 'lao',
        'lt': 'lit',
        'lu': 'lub',
        'lv': 'lav',
        'mg': 'mlg',
        'mh': 'mah',
        'mi': 'mri',
        'mk': 'mkd',
        'ml': 'mal',
        'mn': 'mon',
        'mr': 'mar',
        'ms': 'msa',
        'mt': 'mlt',
        'my': 'mya',
        'na': 'nau',
        'nb': 'nob',
        'nd': 'nde',
        'ne': 'nep',
        'ng': 'ndo',
        'nl': 'nld',
        'nn': 'nno',
        'no': 'nor',
        'nr': 'nbl',
        'nv': 'nav',
        'ny': 'nya',
        'oc': 'oci',
        'oj': 'oji',
        'om': 'orm',
        'or': 'ori',
        'os': 'oss',
        'pa': 'pan',
        'pe': 'per',
        'pi': 'pli',
        'pl': 'pol',
        'ps': 'pus',
        'pt': 'por',
        'qu': 'que',
        'rm': 'roh',
        'rn': 'run',
        'ro': 'ron',
        'ru': 'rus',
        'rw': 'kin',
        'sa': 'san',
        'sc': 'srd',
        'sd': 'snd',
        'se': 'sme',
        'sg': 'sag',
        'si': 'sin',
        'sk': 'slk',
        'sl': 'slv',
        'sm': 'smo',
        'sn': 'sna',
        'so': 'som',
        'sq': 'sqi',
        'sr': 'srp',
        'ss': 'ssw',
        'st': 'sot',
        'su': 'sun',
        'sv': 'swe',
        'sw': 'swa',
        'ta': 'tam',
        'te': 'tel',
        'tg': 'tgk',
        'th': 'tha',
        'ti': 'tir',
        'tk': 'tuk',
        'tl': 'tgl',
        'tn': 'tsn',
        'to': 'ton',
        'tr': 'tur',
        'ts': 'tso',
        'tt': 'tat',
        'tw': 'twi',
        'ty': 'tah',
        'ug': 'uig',
        'uk': 'ukr',
        'ur': 'urd',
        'uz': 'uzb',
        've': 'ven',
        'vi': 'vie',
        'vo': 'vol',
        'wa': 'wln',
        'wo': 'wol',
        'xh': 'xho',
        'yi': 'yid',
        'ji': 'yid',  # Replaced by yi in 1989 revision
        'yo': 'yor',
        'za': 'zha',
        'zh': 'zho',
        'zu': 'zul',
    }

    @classmethod
    def short2long(cls, code):
        """Convert language code from ISO 639-1 to ISO 639-2/T"""
        return cls._lang_map.get(code[:2])

    @classmethod
    def long2short(cls, code):
        """Convert language code from ISO 639-2/T to ISO 639-1"""
        for short_name, long_name in cls._lang_map.items():
            if long_name == code:
                return short_name


class ISO3166Utils:
    # From http://data.okfn.org/data/core/country-list
    _country_map = {
        'AF': 'Afghanistan',
        'AX': 'Åland Islands',
        'AL': 'Albania',
        'DZ': 'Algeria',
        'AS': 'American Samoa',
        'AD': 'Andorra',
        'AO': 'Angola',
        'AI': 'Anguilla',
        'AQ': 'Antarctica',
        'AG': 'Antigua and Barbuda',
        'AR': 'Argentina',
        'AM': 'Armenia',
        'AW': 'Aruba',
        'AU': 'Australia',
        'AT': 'Austria',
        'AZ': 'Azerbaijan',
        'BS': 'Bahamas',
        'BH': 'Bahrain',
        'BD': 'Bangladesh',
        'BB': 'Barbados',
        'BY': 'Belarus',
        'BE': 'Belgium',
        'BZ': 'Belize',
        'BJ': 'Benin',
        'BM': 'Bermuda',
        'BT': 'Bhutan',
        'BO': 'Bolivia, Plurinational State of',
        'BQ': 'Bonaire, Sint Eustatius and Saba',
        'BA': 'Bosnia and Herzegovina',
        'BW': 'Botswana',
        'BV': 'Bouvet Island',
        'BR': 'Brazil',
        'IO': 'British Indian Ocean Territory',
        'BN': 'Brunei Darussalam',
        'BG': 'Bulgaria',
        'BF': 'Burkina Faso',
        'BI': 'Burundi',
        'KH': 'Cambodia',
        'CM': 'Cameroon',
        'CA': 'Canada',
        'CV': 'Cape Verde',
        'KY': 'Cayman Islands',
        'CF': 'Central African Republic',
        'TD': 'Chad',
        'CL': 'Chile',
        'CN': 'China',
        'CX': 'Christmas Island',
        'CC': 'Cocos (Keeling) Islands',
        'CO': 'Colombia',
        'KM': 'Comoros',
        'CG': 'Congo',
        'CD': 'Congo, the Democratic Republic of the',
        'CK': 'Cook Islands',
        'CR': 'Costa Rica',
        'CI': 'Côte d\'Ivoire',
        'HR': 'Croatia',
        'CU': 'Cuba',
        'CW': 'Curaçao',
        'CY': 'Cyprus',
        'CZ': 'Czech Republic',
        'DK': 'Denmark',
        'DJ': 'Djibouti',
        'DM': 'Dominica',
        'DO': 'Dominican Republic',
        'EC': 'Ecuador',
        'EG': 'Egypt',
        'SV': 'El Salvador',
        'GQ': 'Equatorial Guinea',
        'ER': 'Eritrea',
        'EE': 'Estonia',
        'ET': 'Ethiopia',
        'FK': 'Falkland Islands (Malvinas)',
        'FO': 'Faroe Islands',
        'FJ': 'Fiji',
        'FI': 'Finland',
        'FR': 'France',
        'GF': 'French Guiana',
        'PF': 'French Polynesia',
        'TF': 'French Southern Territories',
        'GA': 'Gabon',
        'GM': 'Gambia',
        'GE': 'Georgia',
        'DE': 'Germany',
        'GH': 'Ghana',
        'GI': 'Gibraltar',
        'GR': 'Greece',
        'GL': 'Greenland',
        'GD': 'Grenada',
        'GP': 'Guadeloupe',
        'GU': 'Guam',
        'GT': 'Guatemala',
        'GG': 'Guernsey',
        'GN': 'Guinea',
        'GW': 'Guinea-Bissau',
        'GY': 'Guyana',
        'HT': 'Haiti',
        'HM': 'Heard Island and McDonald Islands',
        'VA': 'Holy See (Vatican City State)',
        'HN': 'Honduras',
        'HK': 'Hong Kong',
        'HU': 'Hungary',
        'IS': 'Iceland',
        'IN': 'India',
        'ID': 'Indonesia',
        'IR': 'Iran, Islamic Republic of',
        'IQ': 'Iraq',
        'IE': 'Ireland',
        'IM': 'Isle of Man',
        'IL': 'Israel',
        'IT': 'Italy',
        'JM': 'Jamaica',
        'JP': 'Japan',
        'JE': 'Jersey',
        'JO': 'Jordan',
        'KZ': 'Kazakhstan',
        'KE': 'Kenya',
        'KI': 'Kiribati',
        'KP': 'Korea, Democratic People\'s Republic of',
        'KR': 'Korea, Republic of',
        'KW': 'Kuwait',
        'KG': 'Kyrgyzstan',
        'LA': 'Lao People\'s Democratic Republic',
        'LV': 'Latvia',
        'LB': 'Lebanon',
        'LS': 'Lesotho',
        'LR': 'Liberia',
        'LY': 'Libya',
        'LI': 'Liechtenstein',
        'LT': 'Lithuania',
        'LU': 'Luxembourg',
        'MO': 'Macao',
        'MK': 'Macedonia, the Former Yugoslav Republic of',
        'MG': 'Madagascar',
        'MW': 'Malawi',
        'MY': 'Malaysia',
        'MV': 'Maldives',
        'ML': 'Mali',
        'MT': 'Malta',
        'MH': 'Marshall Islands',
        'MQ': 'Martinique',
        'MR': 'Mauritania',
        'MU': 'Mauritius',
        'YT': 'Mayotte',
        'MX': 'Mexico',
        'FM': 'Micronesia, Federated States of',
        'MD': 'Moldova, Republic of',
        'MC': 'Monaco',
        'MN': 'Mongolia',
        'ME': 'Montenegro',
        'MS': 'Montserrat',
        'MA': 'Morocco',
        'MZ': 'Mozambique',
        'MM': 'Myanmar',
        'NA': 'Namibia',
        'NR': 'Nauru',
        'NP': 'Nepal',
        'NL': 'Netherlands',
        'NC': 'New Caledonia',
        'NZ': 'New Zealand',
        'NI': 'Nicaragua',
        'NE': 'Niger',
        'NG': 'Nigeria',
        'NU': 'Niue',
        'NF': 'Norfolk Island',
        'MP': 'Northern Mariana Islands',
        'NO': 'Norway',
        'OM': 'Oman',
        'PK': 'Pakistan',
        'PW': 'Palau',
        'PS': 'Palestine, State of',
        'PA': 'Panama',
        'PG': 'Papua New Guinea',
        'PY': 'Paraguay',
        'PE': 'Peru',
        'PH': 'Philippines',
        'PN': 'Pitcairn',
        'PL': 'Poland',
        'PT': 'Portugal',
        'PR': 'Puerto Rico',
        'QA': 'Qatar',
        'RE': 'Réunion',
        'RO': 'Romania',
        'RU': 'Russian Federation',
        'RW': 'Rwanda',
        'BL': 'Saint Barthélemy',
        'SH': 'Saint Helena, Ascension and Tristan da Cunha',
        'KN': 'Saint Kitts and Nevis',
        'LC': 'Saint Lucia',
        'MF': 'Saint Martin (French part)',
        'PM': 'Saint Pierre and Miquelon',
        'VC': 'Saint Vincent and the Grenadines',
        'WS': 'Samoa',
        'SM': 'San Marino',
        'ST': 'Sao Tome and Principe',
        'SA': 'Saudi Arabia',
        'SN': 'Senegal',
        'RS': 'Serbia',
        'SC': 'Seychelles',
        'SL': 'Sierra Leone',
        'SG': 'Singapore',
        'SX': 'Sint Maarten (Dutch part)',
        'SK': 'Slovakia',
        'SI': 'Slovenia',
        'SB': 'Solomon Islands',
        'SO': 'Somalia',
        'ZA': 'South Africa',
        'GS': 'South Georgia and the South Sandwich Islands',
        'SS': 'South Sudan',
        'ES': 'Spain',
        'LK': 'Sri Lanka',
        'SD': 'Sudan',
        'SR': 'Suriname',
        'SJ': 'Svalbard and Jan Mayen',
        'SZ': 'Swaziland',
        'SE': 'Sweden',
        'CH': 'Switzerland',
        'SY': 'Syrian Arab Republic',
        'TW': 'Taiwan, Province of China',
        'TJ': 'Tajikistan',
        'TZ': 'Tanzania, United Republic of',
        'TH': 'Thailand',
        'TL': 'Timor-Leste',
        'TG': 'Togo',
        'TK': 'Tokelau',
        'TO': 'Tonga',
        'TT': 'Trinidad and Tobago',
        'TN': 'Tunisia',
        'TR': 'Turkey',
        'TM': 'Turkmenistan',
        'TC': 'Turks and Caicos Islands',
        'TV': 'Tuvalu',
        'UG': 'Uganda',
        'UA': 'Ukraine',
        'AE': 'United Arab Emirates',
        'GB': 'United Kingdom',
        'US': 'United States',
        'UM': 'United States Minor Outlying Islands',
        'UY': 'Uruguay',
        'UZ': 'Uzbekistan',
        'VU': 'Vanuatu',
        'VE': 'Venezuela, Bolivarian Republic of',
        'VN': 'Viet Nam',
        'VG': 'Virgin Islands, British',
        'VI': 'Virgin Islands, U.S.',
        'WF': 'Wallis and Futuna',
        'EH': 'Western Sahara',
        'YE': 'Yemen',
        'ZM': 'Zambia',
        'ZW': 'Zimbabwe',
        # Not ISO 3166 codes, but used for IP blocks
        'AP': 'Asia/Pacific Region',
        'EU': 'Europe',
    }

    @classmethod
    def short2full(cls, code):
        """Convert an ISO 3166-2 country code to the corresponding full name"""
        return cls._country_map.get(code.upper())


class GeoUtils:
    # Major IPv4 address blocks per country
    _country_ip_map = {
        'AD': '46.172.224.0/19',
        'AE': '94.200.0.0/13',
        'AF': '149.54.0.0/17',
        'AG': '209.59.64.0/18',
        'AI': '204.14.248.0/21',
        'AL': '46.99.0.0/16',
        'AM': '46.70.0.0/15',
        'AO': '105.168.0.0/13',
        'AP': '182.50.184.0/21',
        'AQ': '23.154.160.0/24',
        'AR': '181.0.0.0/12',
        'AS': '202.70.112.0/20',
        'AT': '77.116.0.0/14',
        'AU': '1.128.0.0/11',
        'AW': '181.41.0.0/18',
        'AX': '185.217.4.0/22',
        'AZ': '5.197.0.0/16',
        'BA': '31.176.128.0/17',
        'BB': '65.48.128.0/17',
        'BD': '114.130.0.0/16',
        'BE': '57.0.0.0/8',
        'BF': '102.178.0.0/15',
        'BG': '95.42.0.0/15',
        'BH': '37.131.0.0/17',
        'BI': '154.117.192.0/18',
        'BJ': '137.255.0.0/16',
        'BL': '185.212.72.0/23',
        'BM': '196.12.64.0/18',
        'BN': '156.31.0.0/16',
        'BO': '161.56.0.0/16',
        'BQ': '161.0.80.0/20',
        'BR': '191.128.0.0/12',
        'BS': '24.51.64.0/18',
        'BT': '119.2.96.0/19',
        'BW': '168.167.0.0/16',
        'BY': '178.120.0.0/13',
        'BZ': '179.42.192.0/18',
        'CA': '99.224.0.0/11',
        'CD': '41.243.0.0/16',
        'CF': '197.242.176.0/21',
        'CG': '160.113.0.0/16',
        'CH': '85.0.0.0/13',
        'CI': '102.136.0.0/14',
        'CK': '202.65.32.0/19',
        'CL': '152.172.0.0/14',
        'CM': '102.244.0.0/14',
        'CN': '36.128.0.0/10',
        'CO': '181.240.0.0/12',
        'CR': '201.192.0.0/12',
        'CU': '152.206.0.0/15',
        'CV': '165.90.96.0/19',
        'CW': '190.88.128.0/17',
        'CY': '31.153.0.0/16',
        'CZ': '88.100.0.0/14',
        'DE': '53.0.0.0/8',
        'DJ': '197.241.0.0/17',
        'DK': '87.48.0.0/12',
        'DM': '192.243.48.0/20',
        'DO': '152.166.0.0/15',
        'DZ': '41.96.0.0/12',
        'EC': '186.68.0.0/15',
        'EE': '90.190.0.0/15',
        'EG': '156.160.0.0/11',
        'ER': '196.200.96.0/20',
        'ES': '88.0.0.0/11',
        'ET': '196.188.0.0/14',
        'EU': '2.16.0.0/13',
        'FI': '91.152.0.0/13',
        'FJ': '144.120.0.0/16',
        'FK': '80.73.208.0/21',
        'FM': '119.252.112.0/20',
        'FO': '88.85.32.0/19',
        'FR': '90.0.0.0/9',
        'GA': '41.158.0.0/15',
        'GB': '25.0.0.0/8',
        'GD': '74.122.88.0/21',
        'GE': '31.146.0.0/16',
        'GF': '161.22.64.0/18',
        'GG': '62.68.160.0/19',
        'GH': '154.160.0.0/12',
        'GI': '95.164.0.0/16',
        'GL': '88.83.0.0/19',
        'GM': '160.182.0.0/15',
        'GN': '197.149.192.0/18',
        'GP': '104.250.0.0/19',
        'GQ': '105.235.224.0/20',
        'GR': '94.64.0.0/13',
        'GT': '168.234.0.0/16',
        'GU': '168.123.0.0/16',
        'GW': '197.214.80.0/20',
        'GY': '181.41.64.0/18',
        'HK': '113.252.0.0/14',
        'HN': '181.210.0.0/16',
        'HR': '93.136.0.0/13',
        'HT': '148.102.128.0/17',
        'HU': '84.0.0.0/14',
        'ID': '39.192.0.0/10',
        'IE': '87.32.0.0/12',
        'IL': '79.176.0.0/13',
        'IM': '5.62.80.0/20',
        'IN': '117.192.0.0/10',
        'IO': '203.83.48.0/21',
        'IQ': '37.236.0.0/14',
        'IR': '2.176.0.0/12',
        'IS': '82.221.0.0/16',
        'IT': '79.0.0.0/10',
        'JE': '87.244.64.0/18',
        'JM': '72.27.0.0/17',
        'JO': '176.29.0.0/16',
        'JP': '133.0.0.0/8',
        'KE': '105.48.0.0/12',
        'KG': '158.181.128.0/17',
        'KH': '36.37.128.0/17',
        'KI': '103.25.140.0/22',
        'KM': '197.255.224.0/20',
        'KN': '198.167.192.0/19',
        'KP': '175.45.176.0/22',
        'KR': '175.192.0.0/10',
        'KW': '37.36.0.0/14',
        'KY': '64.96.0.0/15',
        'KZ': '2.72.0.0/13',
        'LA': '115.84.64.0/18',
        'LB': '178.135.0.0/16',
        'LC': '24.92.144.0/20',
        'LI': '82.117.0.0/19',
        'LK': '112.134.0.0/15',
        'LR': '102.183.0.0/16',
        'LS': '129.232.0.0/17',
        'LT': '78.56.0.0/13',
        'LU': '188.42.0.0/16',
        'LV': '46.109.0.0/16',
        'LY': '41.252.0.0/14',
        'MA': '105.128.0.0/11',
        'MC': '88.209.64.0/18',
        'MD': '37.246.0.0/16',
        'ME': '178.175.0.0/17',
        'MF': '74.112.232.0/21',
        'MG': '154.126.0.0/17',
        'MH': '117.103.88.0/21',
        'MK': '77.28.0.0/15',
        'ML': '154.118.128.0/18',
        'MM': '37.111.0.0/17',
        'MN': '49.0.128.0/17',
        'MO': '60.246.0.0/16',
        'MP': '202.88.64.0/20',
        'MQ': '109.203.224.0/19',
        'MR': '41.188.64.0/18',
        'MS': '208.90.112.0/22',
        'MT': '46.11.0.0/16',
        'MU': '105.16.0.0/12',
        'MV': '27.114.128.0/18',
        'MW': '102.70.0.0/15',
        'MX': '187.192.0.0/11',
        'MY': '175.136.0.0/13',
        'MZ': '197.218.0.0/15',
        'NA': '41.182.0.0/16',
        'NC': '101.101.0.0/18',
        'NE': '197.214.0.0/18',
        'NF': '203.17.240.0/22',
        'NG': '105.112.0.0/12',
        'NI': '186.76.0.0/15',
        'NL': '145.96.0.0/11',
        'NO': '84.208.0.0/13',
        'NP': '36.252.0.0/15',
        'NR': '203.98.224.0/19',
        'NU': '49.156.48.0/22',
        'NZ': '49.224.0.0/14',
        'OM': '5.36.0.0/15',
        'PA': '186.72.0.0/15',
        'PE': '186.160.0.0/14',
        'PF': '123.50.64.0/18',
        'PG': '124.240.192.0/19',
        'PH': '49.144.0.0/13',
        'PK': '39.32.0.0/11',
        'PL': '83.0.0.0/11',
        'PM': '70.36.0.0/20',
        'PR': '66.50.0.0/16',
        'PS': '188.161.0.0/16',
        'PT': '85.240.0.0/13',
        'PW': '202.124.224.0/20',
        'PY': '181.120.0.0/14',
        'QA': '37.210.0.0/15',
        'RE': '102.35.0.0/16',
        'RO': '79.112.0.0/13',
        'RS': '93.86.0.0/15',
        'RU': '5.136.0.0/13',
        'RW': '41.186.0.0/16',
        'SA': '188.48.0.0/13',
        'SB': '202.1.160.0/19',
        'SC': '154.192.0.0/11',
        'SD': '102.120.0.0/13',
        'SE': '78.64.0.0/12',
        'SG': '8.128.0.0/10',
        'SI': '188.196.0.0/14',
        'SK': '78.98.0.0/15',
        'SL': '102.143.0.0/17',
        'SM': '89.186.32.0/19',
        'SN': '41.82.0.0/15',
        'SO': '154.115.192.0/18',
        'SR': '186.179.128.0/17',
        'SS': '105.235.208.0/21',
        'ST': '197.159.160.0/19',
        'SV': '168.243.0.0/16',
        'SX': '190.102.0.0/20',
        'SY': '5.0.0.0/16',
        'SZ': '41.84.224.0/19',
        'TC': '65.255.48.0/20',
        'TD': '154.68.128.0/19',
        'TG': '196.168.0.0/14',
        'TH': '171.96.0.0/13',
        'TJ': '85.9.128.0/18',
        'TK': '27.96.24.0/21',
        'TL': '180.189.160.0/20',
        'TM': '95.85.96.0/19',
        'TN': '197.0.0.0/11',
        'TO': '175.176.144.0/21',
        'TR': '78.160.0.0/11',
        'TT': '186.44.0.0/15',
        'TV': '202.2.96.0/19',
        'TW': '120.96.0.0/11',
        'TZ': '156.156.0.0/14',
        'UA': '37.52.0.0/14',
        'UG': '102.80.0.0/13',
        'US': '6.0.0.0/8',
        'UY': '167.56.0.0/13',
        'UZ': '84.54.64.0/18',
        'VA': '212.77.0.0/19',
        'VC': '207.191.240.0/21',
        'VE': '186.88.0.0/13',
        'VG': '66.81.192.0/20',
        'VI': '146.226.0.0/16',
        'VN': '14.160.0.0/11',
        'VU': '202.80.32.0/20',
        'WF': '117.20.32.0/21',
        'WS': '202.4.32.0/19',
        'YE': '134.35.0.0/16',
        'YT': '41.242.116.0/22',
        'ZA': '41.0.0.0/11',
        'ZM': '102.144.0.0/13',
        'ZW': '102.177.192.0/18',
    }

    @classmethod
    def random_ipv4(cls, code_or_block):
        if len(code_or_block) == 2:
            block = cls._country_ip_map.get(code_or_block.upper())
            if not block:
                return None
        else:
            block = code_or_block
        addr, preflen = block.split('/')
        addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
        addr_max = addr_min | (0xffffffff >> int(preflen))
        return str(socket.inet_ntoa(
            struct.pack('!L', random.randint(addr_min, addr_max))))


# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
# released into Public Domain
# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387

def long_to_bytes(n, blocksize=0):
    """long_to_bytes(n:long, blocksize:int) : string
    Convert a long integer to a byte string.

    If optional blocksize is given and greater than zero, pad the front of the
    byte string with binary zeros so that the length is a multiple of
    blocksize.
    """
    # after much testing, this algorithm was deemed to be the fastest
    s = b''
    n = int(n)
    while n > 0:
        s = struct.pack('>I', n & 0xffffffff) + s
        n = n >> 32
    # strip off leading zeros
    for i in range(len(s)):
        if s[i] != b'\000'[0]:
            break
    else:
        # only happens when n == 0
        s = b'\000'
        i = 0
    s = s[i:]
    # add back some pad bytes.  this could be done more efficiently w.r.t. the
    # de-padding being done above, but sigh...
    if blocksize > 0 and len(s) % blocksize:
        s = (blocksize - len(s) % blocksize) * b'\000' + s
    return s


def bytes_to_long(s):
    """bytes_to_long(string) : long
    Convert a byte string to a long integer.

    This is (essentially) the inverse of long_to_bytes().
    """
    acc = 0
    length = len(s)
    if length % 4:
        extra = (4 - length % 4)
        s = b'\000' * extra + s
        length = length + extra
    for i in range(0, length, 4):
        acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
    return acc


def ohdave_rsa_encrypt(data, exponent, modulus):
    """
    Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/

    Input:
        data: data to encrypt, bytes-like object
        exponent, modulus: parameter e and N of RSA algorithm, both integer
    Output: hex string of encrypted data

    Limitation: supports one block encryption only
    """

    payload = int(binascii.hexlify(data[::-1]), 16)
    encrypted = pow(payload, exponent, modulus)
    return f'{encrypted:x}'


def pkcs1pad(data, length):
    """
    Padding input data with PKCS#1 scheme

    @param {int[]} data        input data
    @param {int}   length      target length
    @returns {int[]}           padded data
    """
    if len(data) > length - 11:
        raise ValueError('Input data too long for PKCS#1 padding')

    pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
    return [0, 2, *pseudo_random, 0, *data]


def _base_n_table(n, table):
    if not table and not n:
        raise ValueError('Either table or n must be specified')
    table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]

    if n and n != len(table):
        raise ValueError(f'base {n} exceeds table length {len(table)}')
    return table


def encode_base_n(num, n=None, table=None):
    """Convert given int to a base-n string"""
    table = _base_n_table(n, table)
    if not num:
        return table[0]

    result, base = '', len(table)
    while num:
        result = table[num % base] + result
        num = num // base
    return result


def decode_base_n(string, n=None, table=None):
    """Convert given base-n string to int"""
    table = {char: index for index, char in enumerate(_base_n_table(n, table))}
    result, base = 0, len(table)
    for char in string:
        result = result * base + table[char]
    return result


def decode_packed_codes(code):
    mobj = re.search(PACKED_CODES_RE, code)
    obfuscated_code, base, count, symbols = mobj.groups()
    base = int(base)
    count = int(count)
    symbols = symbols.split('|')
    symbol_table = {}

    while count:
        count -= 1
        base_n_count = encode_base_n(count, base)
        symbol_table[base_n_count] = symbols[count] or base_n_count

    return re.sub(
        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
        obfuscated_code)


def caesar(s, alphabet, shift):
    if shift == 0:
        return s
    l = len(alphabet)
    return ''.join(
        alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
        for c in s)


def rot47(s):
    return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)


def parse_m3u8_attributes(attrib):
    info = {}
    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
        if val.startswith('"'):
            val = val[1:-1]
        info[key] = val
    return info


def urshift(val, n):
    return val >> n if val >= 0 else (val + 0x100000000) >> n


def write_xattr(path, key, value):
    # Windows: Write xattrs to NTFS Alternate Data Streams:
    # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
    if compat_os_name == 'nt':
        assert ':' not in key
        assert os.path.exists(path)

        try:
            with open(f'{path}:{key}', 'wb') as f:
                f.write(value)
        except OSError as e:
            raise XAttrMetadataError(e.errno, e.strerror)
        return

    # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules

    setxattr = None
    if callable(getattr(os, 'setxattr', None)):
        setxattr = os.setxattr
    elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
        # Unicode arguments are not supported in pyxattr until version 0.5.0
        # See https://github.com/ytdl-org/youtube-dl/issues/5498
        if version_tuple(xattr.__version__) >= (0, 5, 0):
            setxattr = xattr.set
    elif xattr:
        setxattr = xattr.setxattr

    if setxattr:
        try:
            setxattr(path, key, value)
        except OSError as e:
            raise XAttrMetadataError(e.errno, e.strerror)
        return

    # UNIX Method 2. Use setfattr/xattr executables
    exe = ('setfattr' if check_executable('setfattr', ['--version'])
           else 'xattr' if check_executable('xattr', ['-h']) else None)
    if not exe:
        raise XAttrUnavailableError(
            'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
            + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))

    value = value.decode()
    try:
        _, stderr, returncode = Popen.run(
            [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
    except OSError as e:
        raise XAttrMetadataError(e.errno, e.strerror)
    if returncode:
        raise XAttrMetadataError(returncode, stderr)


def random_birthday(year_field, month_field, day_field):
    start_date = dt.date(1950, 1, 1)
    end_date = dt.date(1995, 12, 31)
    offset = random.randint(0, (end_date - start_date).days)
    random_date = start_date + dt.timedelta(offset)
    return {
        year_field: str(random_date.year),
        month_field: str(random_date.month),
        day_field: str(random_date.day),
    }


def find_available_port(interface=''):
    try:
        with socket.socket() as sock:
            sock.bind((interface, 0))
            return sock.getsockname()[1]
    except OSError:
        return None


# Templates for internet shortcut files, which are plain text files.
DOT_URL_LINK_TEMPLATE = '''\
[InternetShortcut]
URL=%(url)s
'''

DOT_WEBLOC_LINK_TEMPLATE = '''\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
\t<key>URL</key>
\t<string>%(url)s</string>
</dict>
</plist>
'''

DOT_DESKTOP_LINK_TEMPLATE = '''\
[Desktop Entry]
Encoding=UTF-8
Name=%(filename)s
Type=Link
URL=%(url)s
Icon=text-html
'''

LINK_TEMPLATES = {
    'url': DOT_URL_LINK_TEMPLATE,
    'desktop': DOT_DESKTOP_LINK_TEMPLATE,
    'webloc': DOT_WEBLOC_LINK_TEMPLATE,
}


def iri_to_uri(iri):
    """
    Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).

    The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
    """

    iri_parts = urllib.parse.urlparse(iri)

    if '[' in iri_parts.netloc:
        raise ValueError('IPv6 URIs are not, yet, supported.')
        # Querying `.netloc`, when there's only one bracket, also raises a ValueError.

    # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.

    net_location = ''
    if iri_parts.username:
        net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
        if iri_parts.password is not None:
            net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
        net_location += '@'

    net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
    # The 'idna' encoding produces ASCII text.
    if iri_parts.port is not None and iri_parts.port != 80:
        net_location += ':' + str(iri_parts.port)

    return urllib.parse.urlunparse(
        (iri_parts.scheme,
            net_location,

            urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),

            # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
            urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),

            # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
            urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),

            urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))

    # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.


def to_high_limit_path(path):
    if sys.platform in ['win32', 'cygwin']:
        # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
        return '\\\\?\\' + os.path.abspath(path)

    return path


def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
    val = traversal.traverse_obj(obj, *variadic(field))
    if not val if ignore is NO_DEFAULT else val in variadic(ignore):
        return default
    return template % func(val)


def clean_podcast_url(url):
    url = re.sub(r'''(?x)
        (?:
            (?:
                chtbl\.com/track|
                media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
                play\.podtrac\.com|
                chrt\.fm/track|
                mgln\.ai/e
            )(?:/[^/.]+)?|
            (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
            flex\.acast\.com|
            pd(?:
                cn\.co| # https://podcorn.com/analytics-prefix/
                st\.fm # https://podsights.com/docs/
            )/e|
            [0-9]\.gum\.fm|
            pscrb\.fm/rss/p
        )/''', '', url)
    return re.sub(r'^\w+://(\w+://)', r'\1', url)


_HEX_TABLE = '0123456789abcdef'


def random_uuidv4():
    return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')


def make_dir(path, to_screen=None):
    try:
        dn = os.path.dirname(path)
        if dn:
            os.makedirs(dn, exist_ok=True)
        return True
    except OSError as err:
        if callable(to_screen) is not None:
            to_screen(f'unable to create directory {err}')
        return False


def get_executable_path():
    from ..update import _get_variant_and_executable_path

    return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))


def get_user_config_dirs(package_name):
    # .config (e.g. ~/.config/package_name)
    xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
    yield os.path.join(xdg_config_home, package_name)

    # appdata (%APPDATA%/package_name)
    appdata_dir = os.getenv('appdata')
    if appdata_dir:
        yield os.path.join(appdata_dir, package_name)

    # home (~/.package_name)
    yield os.path.join(compat_expanduser('~'), f'.{package_name}')


def get_system_config_dirs(package_name):
    # /etc/package_name
    yield os.path.join('/etc', package_name)


def time_seconds(**kwargs):
    """
    Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
    """
    return time.time() + dt.timedelta(**kwargs).total_seconds()


# create a JSON Web Signature (jws) with HS256 algorithm
# the resulting format is in JWS Compact Serialization
# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
def jwt_encode_hs256(payload_data, key, headers={}):
    header_data = {
        'alg': 'HS256',
        'typ': 'JWT',
    }
    if headers:
        header_data.update(headers)
    header_b64 = base64.b64encode(json.dumps(header_data).encode())
    payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
    h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
    signature_b64 = base64.b64encode(h.digest())
    return header_b64 + b'.' + payload_b64 + b'.' + signature_b64


# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
def jwt_decode_hs256(jwt):
    header_b64, payload_b64, signature_b64 = jwt.split('.')
    # add trailing ='s that may have been stripped, superfluous ='s are ignored
    return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))


WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None


@functools.cache
def supports_terminal_sequences(stream):
    if compat_os_name == 'nt':
        if not WINDOWS_VT_MODE:
            return False
    elif not os.getenv('TERM'):
        return False
    try:
        return stream.isatty()
    except BaseException:
        return False


def windows_enable_vt_mode():
    """Ref: https://bugs.python.org/issue30075 """
    if get_windows_version() < (10, 0, 10586):
        return

    import ctypes
    import ctypes.wintypes
    import msvcrt

    ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004

    dll = ctypes.WinDLL('kernel32', use_last_error=False)
    handle = os.open('CONOUT$', os.O_RDWR)
    try:
        h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
        dw_original_mode = ctypes.wintypes.DWORD()
        success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
        if not success:
            raise Exception('GetConsoleMode failed')

        success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
            dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
        if not success:
            raise Exception('SetConsoleMode failed')
    finally:
        os.close(handle)

    global WINDOWS_VT_MODE
    WINDOWS_VT_MODE = True
    supports_terminal_sequences.cache_clear()


_terminal_sequences_re = re.compile('\033\\[[^m]+m')


def remove_terminal_sequences(string):
    return _terminal_sequences_re.sub('', string)


def number_of_digits(number):
    return len('%d' % number)


def join_nonempty(*values, delim='-', from_dict=None):
    if from_dict is not None:
        values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
    return delim.join(map(str, filter(None, values)))


def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
    """
    Find the largest format dimensions in terms of video width and, for each thumbnail:
    * Modify the URL: Match the width with the provided regex and replace with the former width
    * Update dimensions

    This function is useful with video services that scale the provided thumbnails on demand
    """
    _keys = ('width', 'height')
    max_dimensions = max(
        (tuple(fmt.get(k) or 0 for k in _keys) for fmt in formats),
        default=(0, 0))
    if not max_dimensions[0]:
        return thumbnails
    return [
        merge_dicts(
            {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
            dict(zip(_keys, max_dimensions)), thumbnail)
        for thumbnail in thumbnails
    ]


def parse_http_range(range):
    """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
    if not range:
        return None, None, None
    crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
    if not crg:
        return None, None, None
    return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))


def read_stdin(what):
    if what:
        eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
        write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
    return sys.stdin


def determine_file_encoding(data):
    """
    Detect the text encoding used
    @returns (encoding, bytes to skip)
    """

    # BOM marks are given priority over declarations
    for bom, enc in BOMS:
        if data.startswith(bom):
            return enc, len(bom)

    # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
    # We ignore the endianness to get a good enough match
    data = data.replace(b'\0', b'')
    mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
    return mobj.group(1).decode() if mobj else None, 0


class Config:
    own_args = None
    parsed_args = None
    filename = None
    __initialized = False

    def __init__(self, parser, label=None):
        self.parser, self.label = parser, label
        self._loaded_paths, self.configs = set(), []

    def init(self, args=None, filename=None):
        assert not self.__initialized
        self.own_args, self.filename = args, filename
        return self.load_configs()

    def load_configs(self):
        directory = ''
        if self.filename:
            location = os.path.realpath(self.filename)
            directory = os.path.dirname(location)
            if location in self._loaded_paths:
                return False
            self._loaded_paths.add(location)

        self.__initialized = True
        opts, _ = self.parser.parse_known_args(self.own_args)
        self.parsed_args = self.own_args
        for location in opts.config_locations or []:
            if location == '-':
                if location in self._loaded_paths:
                    continue
                self._loaded_paths.add(location)
                self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
                continue
            location = os.path.join(directory, expand_path(location))
            if os.path.isdir(location):
                location = os.path.join(location, 'yt-dlp.conf')
            if not os.path.exists(location):
                self.parser.error(f'config location {location} does not exist')
            self.append_config(self.read_file(location), location)
        return True

    def __str__(self):
        label = join_nonempty(
            self.label, 'config', f'"{self.filename}"' if self.filename else '',
            delim=' ')
        return join_nonempty(
            self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
            *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
            delim='\n')

    @staticmethod
    def read_file(filename, default=[]):
        try:
            optionf = open(filename, 'rb')
        except OSError:
            return default  # silently skip if file is not present
        try:
            enc, skip = determine_file_encoding(optionf.read(512))
            optionf.seek(skip, io.SEEK_SET)
        except OSError:
            enc = None  # silently skip read errors
        try:
            # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
            contents = optionf.read().decode(enc or preferredencoding())
            res = shlex.split(contents, comments=True)
        except Exception as err:
            raise ValueError(f'Unable to parse "{filename}": {err}')
        finally:
            optionf.close()
        return res

    @staticmethod
    def hide_login_info(opts):
        PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
        eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')

        def _scrub_eq(o):
            m = eqre.match(o)
            if m:
                return m.group('key') + '=PRIVATE'
            else:
                return o

        opts = list(map(_scrub_eq, opts))
        for idx, opt in enumerate(opts):
            if opt in PRIVATE_OPTS and idx + 1 < len(opts):
                opts[idx + 1] = 'PRIVATE'
        return opts

    def append_config(self, *args, label=None):
        config = type(self)(self.parser, label)
        config._loaded_paths = self._loaded_paths
        if config.init(*args):
            self.configs.append(config)

    @property
    def all_args(self):
        for config in reversed(self.configs):
            yield from config.all_args
        yield from self.parsed_args or []

    def parse_known_args(self, **kwargs):
        return self.parser.parse_known_args(self.all_args, **kwargs)

    def parse_args(self):
        return self.parser.parse_args(self.all_args)


def merge_headers(*dicts):
    """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
    return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}


def cached_method(f):
    """Cache a method"""
    signature = inspect.signature(f)

    @functools.wraps(f)
    def wrapper(self, *args, **kwargs):
        bound_args = signature.bind(self, *args, **kwargs)
        bound_args.apply_defaults()
        key = tuple(bound_args.arguments.values())[1:]

        cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
        if key not in cache:
            cache[key] = f(self, *args, **kwargs)
        return cache[key]
    return wrapper


class classproperty:
    """property access for class methods with optional caching"""
    def __new__(cls, func=None, *args, **kwargs):
        if not func:
            return functools.partial(cls, *args, **kwargs)
        return super().__new__(cls)

    def __init__(self, func, *, cache=False):
        functools.update_wrapper(self, func)
        self.func = func
        self._cache = {} if cache else None

    def __get__(self, _, cls):
        if self._cache is None:
            return self.func(cls)
        elif cls not in self._cache:
            self._cache[cls] = self.func(cls)
        return self._cache[cls]


class function_with_repr:
    def __init__(self, func, repr_=None):
        functools.update_wrapper(self, func)
        self.func, self.__repr = func, repr_

    def __call__(self, *args, **kwargs):
        return self.func(*args, **kwargs)

    @classmethod
    def set_repr(cls, repr_):
        return functools.partial(cls, repr_=repr_)

    def __repr__(self):
        if self.__repr:
            return self.__repr
        return f'{self.func.__module__}.{self.func.__qualname__}'


class Namespace(types.SimpleNamespace):
    """Immutable namespace"""

    def __iter__(self):
        return iter(self.__dict__.values())

    @property
    def items_(self):
        return self.__dict__.items()


MEDIA_EXTENSIONS = Namespace(
    common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
    video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
    common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
    audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
    thumbnails=('jpg', 'png', 'webp'),
    storyboards=('mhtml', ),
    subtitles=('srt', 'vtt', 'ass', 'lrc'),
    manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
)
MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio

KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)


class _UnsafeExtensionError(Exception):
    """
    Mitigation exception for uncommon/malicious file extensions
    This should be caught in YoutubeDL.py alongside a warning

    Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
    """
    ALLOWED_EXTENSIONS = frozenset([
        # internal
        'description',
        'json',
        'meta',
        'orig',
        'part',
        'temp',
        'uncut',
        'unknown_video',
        'ytdl',

        # video
        *MEDIA_EXTENSIONS.video,
        'asx',
        'ismv',
        'm2t',
        'm2ts',
        'm2v',
        'm4s',
        'mng',
        'mp2v',
        'mp4v',
        'mpe',
        'mpeg',
        'mpeg1',
        'mpeg2',
        'mpeg4',
        'mxf',
        'ogm',
        'qt',
        'rm',
        'swf',
        'ts',
        'vob',
        'vp9',

        # audio
        *MEDIA_EXTENSIONS.audio,
        '3ga',
        'ac3',
        'adts',
        'aif',
        'au',
        'dts',
        'isma',
        'it',
        'mid',
        'mod',
        'mpga',
        'mp1',
        'mp2',
        'mp4a',
        'mpa',
        'ra',
        'shn',
        'xm',

        # image
        *MEDIA_EXTENSIONS.thumbnails,
        'avif',
        'bmp',
        'gif',
        'heic',
        'ico',
        'jng',
        'jpeg',
        'jxl',
        'svg',
        'tif',
        'tiff',
        'wbmp',

        # subtitle
        *MEDIA_EXTENSIONS.subtitles,
        'dfxp',
        'fs',
        'ismt',
        'json3',
        'sami',
        'scc',
        'srv1',
        'srv2',
        'srv3',
        'ssa',
        'tt',
        'ttml',
        'xml',

        # others
        *MEDIA_EXTENSIONS.manifests,
        *MEDIA_EXTENSIONS.storyboards,
        'desktop',
        'ism',
        'm3u',
        'sbv',
        'url',
        'webloc',
    ])

    def __init__(self, extension, /):
        super().__init__(f'unsafe file extension: {extension!r}')
        self.extension = extension

    @classmethod
    def sanitize_extension(cls, extension, /, *, prepend=False):
        if extension is None:
            return None

        if '/' in extension or '\\' in extension:
            raise cls(extension)

        if not prepend:
            _, _, last = extension.rpartition('.')
            if last == 'bin':
                extension = last = 'unknown_video'
            if last.lower() not in cls.ALLOWED_EXTENSIONS:
                raise cls(extension)

        return extension


class RetryManager:
    """Usage:
        for retry in RetryManager(...):
            try:
                ...
            except SomeException as err:
                retry.error = err
                continue
    """
    attempt, _error = 0, None

    def __init__(self, _retries, _error_callback, **kwargs):
        self.retries = _retries or 0
        self.error_callback = functools.partial(_error_callback, **kwargs)

    def _should_retry(self):
        return self._error is not NO_DEFAULT and self.attempt <= self.retries

    @property
    def error(self):
        if self._error is NO_DEFAULT:
            return None
        return self._error

    @error.setter
    def error(self, value):
        self._error = value

    def __iter__(self):
        while self._should_retry():
            self.error = NO_DEFAULT
            self.attempt += 1
            yield self
            if self.error:
                self.error_callback(self.error, self.attempt, self.retries)

    @staticmethod
    def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
        """Utility function for reporting retries"""
        if count > retries:
            if error:
                return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
            raise e

        if not count:
            return warn(e)
        elif isinstance(e, ExtractorError):
            e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
        warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')

        delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
        if delay:
            info(f'Sleeping {delay:.2f} seconds ...')
            time.sleep(delay)


def make_archive_id(ie, video_id):
    ie_key = ie if isinstance(ie, str) else ie.ie_key()
    return f'{ie_key.lower()} {video_id}'


def truncate_string(s, left, right=0):
    assert left > 3 and right >= 0
    if s is None or len(s) <= left + right:
        return s
    return f'{s[:left - 3]}...{s[-right:] if right else ""}'


def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
    assert 'all' in alias_dict, '"all" alias is required'
    requested = list(start or [])
    for val in options:
        discard = val.startswith('-')
        if discard:
            val = val[1:]

        if val in alias_dict:
            val = alias_dict[val] if not discard else [
                i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
            # NB: Do not allow regex in aliases for performance
            requested = orderedSet_from_options(val, alias_dict, start=requested)
            continue

        current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
                   else [val] if val in alias_dict['all'] else None)
        if current is None:
            raise ValueError(val)

        if discard:
            for item in current:
                while item in requested:
                    requested.remove(item)
        else:
            requested.extend(current)

    return orderedSet(requested)


# TODO: Rewrite
class FormatSorter:
    regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'

    default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
               'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
               'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
    ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
                    'height', 'width', 'proto', 'vext', 'abr', 'aext',
                    'fps', 'fs_approx', 'source', 'id')

    settings = {
        'vcodec': {'type': 'ordered', 'regex': True,
                   'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
        'acodec': {'type': 'ordered', 'regex': True,
                   'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
        'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
                'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
        'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
                  'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
        'vext': {'type': 'ordered', 'field': 'video_ext',
                 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
                 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
        'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
                 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
                 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
        'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
        'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
                       'field': ('vcodec', 'acodec'),
                       'function': lambda it: int(any(v != 'none' for v in it))},
        'ie_pref': {'priority': True, 'type': 'extractor'},
        'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
        'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
        'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
        'quality': {'convert': 'float', 'default': -1},
        'filesize': {'convert': 'bytes'},
        'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
        'id': {'convert': 'string', 'field': 'format_id'},
        'height': {'convert': 'float_none'},
        'width': {'convert': 'float_none'},
        'fps': {'convert': 'float_none'},
        'channels': {'convert': 'float_none', 'field': 'audio_channels'},
        'tbr': {'convert': 'float_none'},
        'vbr': {'convert': 'float_none'},
        'abr': {'convert': 'float_none'},
        'asr': {'convert': 'float_none'},
        'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},

        'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
        'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
               'function': lambda it: next(filter(None, it), None)},
        'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
                 'function': lambda it: next(filter(None, it), None)},
        'ext': {'type': 'combined', 'field': ('vext', 'aext')},
        'res': {'type': 'multiple', 'field': ('height', 'width'),
                'function': lambda it: min(filter(None, it), default=0)},

        # Actual field names
        'format_id': {'type': 'alias', 'field': 'id'},
        'preference': {'type': 'alias', 'field': 'ie_pref'},
        'language_preference': {'type': 'alias', 'field': 'lang'},
        'source_preference': {'type': 'alias', 'field': 'source'},
        'protocol': {'type': 'alias', 'field': 'proto'},
        'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
        'audio_channels': {'type': 'alias', 'field': 'channels'},

        # Deprecated
        'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
        'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
        'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
        'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
        'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
        'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
        'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
        'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
        'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
        'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
        'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
        'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
        'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
        'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
        'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
        'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
        'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
        'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
        'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
        'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
    }

    def __init__(self, ydl, field_preference):
        self.ydl = ydl
        self._order = []
        self.evaluate_params(self.ydl.params, field_preference)
        if ydl.params.get('verbose'):
            self.print_verbose_info(self.ydl.write_debug)

    def _get_field_setting(self, field, key):
        if field not in self.settings:
            if key in ('forced', 'priority'):
                return False
            self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
                                        'deprecated and may be removed in a future version')
            self.settings[field] = {}
        prop_obj = self.settings[field]
        if key not in prop_obj:
            type_ = prop_obj.get('type')
            if key == 'field':
                default = 'preference' if type_ == 'extractor' else (field,) if type_ in ('combined', 'multiple') else field
            elif key == 'convert':
                default = 'order' if type_ == 'ordered' else 'float_string' if field else 'ignore'
            else:
                default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key)
            prop_obj[key] = default
        return prop_obj[key]

    def _resolve_field_value(self, field, value, convert_none=False):
        if value is None:
            if not convert_none:
                return None
        else:
            value = value.lower()
        conversion = self._get_field_setting(field, 'convert')
        if conversion == 'ignore':
            return None
        if conversion == 'string':
            return value
        elif conversion == 'float_none':
            return float_or_none(value)
        elif conversion == 'bytes':
            return parse_bytes(value)
        elif conversion == 'order':
            order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
            use_regex = self._get_field_setting(field, 'regex')
            list_length = len(order_list)
            empty_pos = order_list.index('') if '' in order_list else list_length + 1
            if use_regex and value is not None:
                for i, regex in enumerate(order_list):
                    if regex and re.match(regex, value):
                        return list_length - i
                return list_length - empty_pos  # not in list
            else:  # not regex or  value = None
                return list_length - (order_list.index(value) if value in order_list else empty_pos)
        else:
            if value.isnumeric():
                return float(value)
            else:
                self.settings[field]['convert'] = 'string'
                return value

    def evaluate_params(self, params, sort_extractor):
        self._use_free_order = params.get('prefer_free_formats', False)
        self._sort_user = params.get('format_sort', [])
        self._sort_extractor = sort_extractor

        def add_item(field, reverse, closest, limit_text):
            field = field.lower()
            if field in self._order:
                return
            self._order.append(field)
            limit = self._resolve_field_value(field, limit_text)
            data = {
                'reverse': reverse,
                'closest': False if limit is None else closest,
                'limit_text': limit_text,
                'limit': limit}
            if field in self.settings:
                self.settings[field].update(data)
            else:
                self.settings[field] = data

        sort_list = (
            tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
            + (tuple() if params.get('format_sort_force', False)
                else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
            + tuple(self._sort_user) + tuple(sort_extractor) + self.default)

        for item in sort_list:
            match = re.match(self.regex, item)
            if match is None:
                raise ExtractorError(f'Invalid format sort string "{item}" given by extractor')
            field = match.group('field')
            if field is None:
                continue
            if self._get_field_setting(field, 'type') == 'alias':
                alias, field = field, self._get_field_setting(field, 'field')
                if self._get_field_setting(alias, 'deprecated'):
                    self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
                                                f'be removed in a future version. Please use {field} instead')
            reverse = match.group('reverse') is not None
            closest = match.group('separator') == '~'
            limit_text = match.group('limit')

            has_limit = limit_text is not None
            has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
            has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')

            fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
            limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
            limit_count = len(limits)
            for (i, f) in enumerate(fields):
                add_item(f, reverse, closest,
                         limits[i] if i < limit_count
                         else limits[0] if has_limit and not has_multiple_limits
                         else None)

    def print_verbose_info(self, write_debug):
        if self._sort_user:
            write_debug('Sort order given by user: {}'.format(', '.join(self._sort_user)))
        if self._sort_extractor:
            write_debug('Sort order given by extractor: {}'.format(', '.join(self._sort_extractor)))
        write_debug('Formats sorted by: {}'.format(', '.join(['{}{}{}'.format(
            '+' if self._get_field_setting(field, 'reverse') else '', field,
            '{}{}({})'.format('~' if self._get_field_setting(field, 'closest') else ':',
                              self._get_field_setting(field, 'limit_text'),
                              self._get_field_setting(field, 'limit'))
            if self._get_field_setting(field, 'limit_text') is not None else '')
            for field in self._order if self._get_field_setting(field, 'visible')])))

    def _calculate_field_preference_from_value(self, format_, field, type_, value):
        reverse = self._get_field_setting(field, 'reverse')
        closest = self._get_field_setting(field, 'closest')
        limit = self._get_field_setting(field, 'limit')

        if type_ == 'extractor':
            maximum = self._get_field_setting(field, 'max')
            if value is None or (maximum is not None and value >= maximum):
                value = -1
        elif type_ == 'boolean':
            in_list = self._get_field_setting(field, 'in_list')
            not_in_list = self._get_field_setting(field, 'not_in_list')
            value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
        elif type_ == 'ordered':
            value = self._resolve_field_value(field, value, True)

        # try to convert to number
        val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
        is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
        if is_num:
            value = val_num

        return ((-10, 0) if value is None
                else (1, value, 0) if not is_num  # if a field has mixed strings and numbers, strings are sorted higher
                else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
                else (0, value, 0) if not reverse and (limit is None or value <= limit)
                else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
                else (-1, value, 0))

    def _calculate_field_preference(self, format_, field):
        type_ = self._get_field_setting(field, 'type')  # extractor, boolean, ordered, field, multiple
        get_value = lambda f: format_.get(self._get_field_setting(f, 'field'))
        if type_ == 'multiple':
            type_ = 'field'  # Only 'field' is allowed in multiple for now
            actual_fields = self._get_field_setting(field, 'field')

            value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
        else:
            value = get_value(field)
        return self._calculate_field_preference_from_value(format_, field, type_, value)

    def calculate_preference(self, format):
        # Determine missing protocol
        if not format.get('protocol'):
            format['protocol'] = determine_protocol(format)

        # Determine missing ext
        if not format.get('ext') and 'url' in format:
            format['ext'] = determine_ext(format['url'])
        if format.get('vcodec') == 'none':
            format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
            format['video_ext'] = 'none'
        else:
            format['video_ext'] = format['ext']
            format['audio_ext'] = 'none'
        # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'):  # Not supported?
        #    format['preference'] = -1000

        if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265|he?vc?', format.get('vcodec') or ''):
            # HEVC-over-FLV is out-of-spec by FLV's original spec
            # ref. https://trac.ffmpeg.org/ticket/6389
            # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
            format['preference'] = -100

        # Determine missing bitrates
        if format.get('vcodec') == 'none':
            format['vbr'] = 0
        if format.get('acodec') == 'none':
            format['abr'] = 0
        if not format.get('vbr') and format.get('vcodec') != 'none':
            format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
        if not format.get('abr') and format.get('acodec') != 'none':
            format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
        if not format.get('tbr'):
            format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None

        return tuple(self._calculate_field_preference(format, field) for field in self._order)


def filesize_from_tbr(tbr, duration):
    """
    @param tbr:      Total bitrate in kbps (1000 bits/sec)
    @param duration: Duration in seconds
    @returns         Filesize in bytes
    """
    if tbr is None or duration is None:
        return None
    return int(duration * tbr * (1000 / 8))


# XXX: Temporary
class _YDLLogger:
    def __init__(self, ydl=None):
        self._ydl = ydl

    def debug(self, message):
        if self._ydl:
            self._ydl.write_debug(message)

    def info(self, message):
        if self._ydl:
            self._ydl.to_screen(message)

    def warning(self, message, *, once=False):
        if self._ydl:
            self._ydl.report_warning(message, once)

    def error(self, message, *, is_error=True):
        if self._ydl:
            self._ydl.report_error(message, is_error=is_error)

    def stdout(self, message):
        if self._ydl:
            self._ydl.to_stdout(message)

    def stderr(self, message):
        if self._ydl:
            self._ydl.to_stderr(message)
-												[letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.

											
										
										
											2015-07-22 12:03:05 +00:00
+								import base64
-												[utils] Add OHDave's RSA encryption function

											
										
										
											2016-02-16 22:01:44 +00:00
+								import binascii
-												[instagram] Fix info_dict key name

											
										
										
											2014-03-24 00:40:09 +00:00
+								import calendar
-												Fix unicode_escape (Fixes #2695)

											
										
										
											2014-04-04 21:00:51 +00:00
+								import codecs
-												[utils] Improve cookie files support

+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)

											
										
										
											2020-05-04 21:19:33 +00:00
+								import collections
-												[utils] `traverse_obj`: Rewrite, document and add tests (#5024)

Authored by: Grub4K
											
										
										
											2022-09-25 21:03:19 +00:00
+								import collections.abc
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											2014-02-25 00:43:17 +00:00
+								import contextlib
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								import datetime as dt
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								import email.header
-												[cleanup] Sort imports

Using https://github.com/PyCQA/isort

    isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .

											
										
										
											2022-04-11 22:32:57 +00:00
+								import email.utils
-												Do not re-encode / to # if / is a platform separator, and correctly handle permission errors (Fixes #831)

											
										
										
											2013-05-13 07:20:08 +00:00
+								import errno
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
+								import hashlib
 								import hmac
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								import html.entities
 								import html.parser
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								import inspect
-												Use io.BytesIO instead of StringIO

											
										
										
											2012-11-27 23:09:17 +00:00
+								import io
-												Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347

											
										
										
											2016-05-02 03:21:39 +00:00
+								import itertools
-												Correct JSON writing (Closes #596)

											
										
										
											2012-12-20 12:13:24 +00:00
+								import json
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								import locale
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											2013-11-25 02:12:26 +00:00
+								import math
-												[cleanup] Sort imports

Using https://github.com/PyCQA/isort

    isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .

											
										
										
											2022-04-11 22:32:57 +00:00
+								import mimetypes
-												Add option `--netrc-cmd` (#6682)

Authored by: NDagestad, pukkandan
Closes #1706
											
										
										
											2023-06-21 03:07:42 +00:00
+								import netrc
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								import operator
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								import os
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											2013-08-28 10:57:10 +00:00
+								import platform
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								import random
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								import re
-												[cleanup] Sort imports

Using https://github.com/PyCQA/isort

    isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .

											
										
										
											2022-04-11 22:32:57 +00:00
+								import shlex
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											2013-08-28 10:57:10 +00:00
+								import socket
-												Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347

											
										
										
											2016-05-02 03:21:39 +00:00
+								import ssl
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								import struct
-												Improve --bidi-workaround support

											
										
										
											2013-12-09 17:29:07 +00:00
+								import subprocess
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								import sys
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
+								import tempfile
-												[utils] Improve cookie files support

+ Add support for UTF-8 in cookie files
* Skip malformed cookie file entries instead of crashing (invalid entry len, invalid expires at)

											
										
										
											2020-05-04 21:19:33 +00:00
+								import time
-												Make ExtractorError usable for other causes

											
										
										
											2013-01-03 14:39:55 +00:00
+								import traceback
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								import types
-												[outtmpl] Smarter replacing of unsupported characters

Closes #1330

											
										
										
											2022-08-04 14:49:32 +00:00
+								import unicodedata
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								import urllib.error
-												[cleanup] Sort imports

Using https://github.com/PyCQA/isort

    isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .

											
										
										
											2022-04-11 22:32:57 +00:00
+								import urllib.parse
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								import urllib.request
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											2014-03-10 16:31:32 +00:00
+								import xml.etree.ElementTree
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								from . import traversal
 								from ..compat import functools  # isort: split
 								from ..compat import (
-												Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178)

Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes).

											
										
										
											2015-10-25 19:04:55 +00:00
+								    compat_etree_fromstring,
-												[utils] Introduce expand_path

											
										
										
											2017-03-25 19:30:10 +00:00
+								    compat_expanduser,
-												[cleanup] Sort imports

Using https://github.com/PyCQA/isort

    isort -m VERTICAL_HANGING_INDENT --py 36 -l 80 --rr -n --tc .

											
										
										
											2022-04-11 22:32:57 +00:00
+								    compat_HTMLParseError,
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								    compat_os_name,
-												[util] Move compatibility functions out of util

utils is large enough without these compatibility functions.

Everything that is present in newer versions of Python (i.e. with dev Python it's just an import) goes into compat.py .
Everything else (i.e. youtube-dl-specific helpers) goes into utils.py .

											
										
										
											2014-11-02 10:23:40 +00:00
+								)
-												[rh:websockets] Migrate websockets to networking framework (#7720)

* Adds a basic WebSocket framework
* Introduces new minimum `websockets` version of 12.0
* Deprecates `WebSocketsWrapper`

Fixes https://github.com/yt-dlp/yt-dlp/issues/8439

Authored by: coletdjnz
											
										
										
											2023-11-20 08:04:04 +00:00
+								from ..dependencies import xattr
-												[utils] Register SOCKS protocols in urllib and support SOCKS4A

											
										
										
											2016-05-03 07:15:32 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								__name__ = __name__.rsplit('.', 1)[0]  # noqa: A001: Pretend to be the parent module
-												[devscripts/cli_to_api] Add script

											
										
										
											2023-05-24 17:59:30 +00:00
-												implement fallbacks and defaults in _search_regex

											
										
										
											2013-06-06 12:35:08 +00:00
+								# This is not clearly defined otherwise
 								compiled_regex_type = type(re.compile(''))
-												[utils] Introduce random_user_agent and use as default User-Agent (closes #21546)

											
										
										
											2019-06-28 17:32:43 +00:00
-												Update to ytdl-commit-d1c6c5

[YouTube] [core] Improve platform debug log, based on yt-dlp
https://github.com/ytdl-org/youtube-dl/commit/d1c6c5c4d618fa950813c0c71aede34a5ac851e9

Except:
    * 6ed34338285f722d0da312ce0af3a15a077a3e2a [jsinterp] Add short-cut evaluation for common expression
        * There was no performance improvement when tested with https://github.com/ytdl-org/youtube-dl/issues/30641
    * e8de54bce50f6f77a4d7e8e80675f7003d5bf630 [core] Handle `/../` sequences in HTTP URLs
        * We plan to implement this differently

											
										
										
											2023-05-24 18:00:43 +00:00
+								class NO_DEFAULT:
 								    pass
 								def IDENTITY(x):
 								    return x
-												[utils] Add default value for xpath_text

											
										
										
											2015-06-28 16:56:07 +00:00
-												[Yam] Add new extractor

											
										
										
											2015-02-13 07:14:23 +00:00
+								ENGLISH_MONTH_NAMES = [
 								    'January', 'February', 'March', 'April', 'May', 'June',
 								    'July', 'August', 'September', 'October', 'November', 'December']
-												[utils] Improve month_by_name and add tests

											
										
										
											2016-09-14 16:13:55 +00:00
+								MONTH_NAMES = {
 								    'en': ENGLISH_MONTH_NAMES,
 								    'fr': [
-												[utils] Use native french month names

											
										
										
											2016-09-14 16:57:01 +00:00
+								        'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 								        'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
-												[extractor/agora] Add extractors (#5101)

Authored by: selfisekai
											
										
										
											2022-11-04 14:54:05 +00:00
+								    # these follow the genitive grammatical case (dopełniacz)
 								    # some websites might be using nominative, which will require another month list
 								    # https://en.wikibooks.org/wiki/Polish/Noun_cases
 								    'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
 								           'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
-												[utils] Improve month_by_name and add tests

											
										
										
											2016-09-14 16:13:55 +00:00
+								}
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											2016-09-02 16:31:52 +00:00
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 								TIMEZONE_NAMES = {
 								    'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 								    'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 								    'EST': -5, 'EDT': -4,  # Eastern
 								    'CST': -6, 'CDT': -5,  # Central
 								    'MST': -7, 'MDT': -6,  # Mountain
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    'PST': -8, 'PDT': -7,   # Pacific
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								}
-												improved performance by extracting accented chars to top level

											
										
										
											2016-05-03 00:40:30 +00:00
+								# needed for sanitizing filenames in restricted mode
-												Added sanitization support for Hungarian letters Ő and Ű
											
										
										
											2016-06-02 09:51:48 +00:00
+								ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
-												[utils] Transliterate "þ" as "th" (#20897)

Despite visual similarity "þ" is unrelated to "p".
It is normally transliterated as "th":

    $ echo þ-Þ | iconv -t ASCII//TRANSLIT
    th-TH
											
										
										
											2019-05-10 18:42:32 +00:00
+								                        itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 								                                        'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
-												improved performance by extracting accented chars to top level

											
										
										
											2016-05-03 00:40:30 +00:00
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								DATE_FORMATS = (
 								    '%d %B %Y',
 								    '%d %b %Y',
 								    '%B %d %Y',
-												[utils] Add more date formats

											
										
										
											2017-01-12 15:39:45 +00:00
+								    '%B %dst %Y',
 								    '%B %dnd %Y',
-												[utils] Handle rd-suffixed day parts in unified_strdate (#23199)


											
										
										
											2019-11-26 17:08:37 +00:00
+								    '%B %drd %Y',
-												[utils] Add more date formats

											
										
										
											2017-01-12 15:39:45 +00:00
+								    '%B %dth %Y',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%b %d %Y',
-												[utils] Add more date formats

											
										
										
											2017-01-12 15:39:45 +00:00
+								    '%b %dst %Y',
 								    '%b %dnd %Y',
-												[utils] Handle rd-suffixed day parts in unified_strdate (#23199)


											
										
										
											2019-11-26 17:08:37 +00:00
+								    '%b %drd %Y',
-												[utils] Add more date formats

											
										
										
											2017-01-12 15:39:45 +00:00
+								    '%b %dth %Y',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%b %dst %Y %I:%M',
 								    '%b %dnd %Y %I:%M',
-												[utils] Handle rd-suffixed day parts in unified_strdate (#23199)


											
										
										
											2019-11-26 17:08:37 +00:00
+								    '%b %drd %Y %I:%M',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%b %dth %Y %I:%M',
 								    '%Y %m %d',
 								    '%Y-%m-%d',
-												[Mediaklikk] Add Extractor (#867)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/17453, https://github.com/ytdl-org/youtube-dl/pull/25098
Fixes: https://github.com/ytdl-org/youtube-dl/issues/21431
Authored-by: tmarki, mrx23dot, coletdjnz
											
										
										
											2021-09-06 06:52:38 +00:00
+								    '%Y.%m.%d.',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%Y/%m/%d',
-												[utils] Recognize more formats in unified_timestamp

Used in CtsNews

											
										
										
											2016-08-10 03:36:49 +00:00
+								    '%Y/%m/%d %H:%M',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%Y/%m/%d %H:%M:%S',
-												[radiko] Add extractors (#731)

https://github.com/ytdl-org/youtube-dl/issues/29840
Authored by: nao20010128nao
											
										
										
											2021-08-25 04:48:27 +00:00
+								    '%Y%m%d%H%M',
 								    '%Y%m%d%H%M%S',
-												[utils] Fix parsing `YYYYMMDD` dates in Nov/Dec (#2094)

The date format `%Y%m%d%H%M` will successfully match against
one-digit month, day, hour, and minute strings, even though %m et al.
are documented as being zero-padded. So dates without time in
Nov/Dec may be wrongly parsed as dates in January with time.

This commit adds a format string of `%Y%m%d` to our supported date
format strings directly below (higher priority) its problematic relatives.

Closes #2076
Authored by: wlritchi
											
										
										
											2021-12-23 20:34:01 +00:00
+								    '%Y%m%d',
-												[utils] Add another date format seen in NextTV

											
										
										
											2017-01-23 15:31:43 +00:00
+								    '%Y-%m-%d %H:%M',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%Y-%m-%d %H:%M:%S',
 								    '%Y-%m-%d %H:%M:%S.%f',
-												[parlview] Add extractor (#322)

Authored by: king-millez
											
										
										
											2021-05-20 13:05:37 +00:00
+								    '%Y-%m-%d %H:%M:%S:%f',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    '%d.%m.%Y %H:%M',
 								    '%d.%m.%Y %H.%M',
 								    '%Y-%m-%dT%H:%M:%SZ',
 								    '%Y-%m-%dT%H:%M:%S.%fZ',
 								    '%Y-%m-%dT%H:%M:%S.%f0Z',
 								    '%Y-%m-%dT%H:%M:%S',
 								    '%Y-%m-%dT%H:%M:%S.%f',
 								    '%Y-%m-%dT%H:%M',
-												[utils] Lower priority for rare date formats and add tests

											
										
										
											2016-09-29 16:47:25 +00:00
+								    '%b %d %Y at %H:%M',
 								    '%b %d %Y at %H:%M:%S',
-												[utils] Add another date format pattern (#14999)

											
										
										
											2017-12-16 14:56:16 +00:00
+								    '%B %d %Y at %H:%M',
 								    '%B %d %Y at %H:%M:%S',
-												[CGTN] Add extractor (#981)

Authored by: chao813
											
										
										
											2021-09-19 12:18:22 +00:00
+								    '%H:%M %d-%b-%Y',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								)
 								DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 								DATE_FORMATS_DAY_FIRST.extend([
 								    '%d-%m-%Y',
 								    '%d.%m.%Y',
 								    '%d.%m.%y',
 								    '%d/%m/%Y',
 								    '%d/%m/%y',
 								    '%d/%m/%Y %H:%M:%S',
-												[extractor/rai] Add raisudtirol extractor (#4524)

Closes #4206
Authored by: nixxo
											
										
										
											2022-08-01 19:25:48 +00:00
+								    '%d-%m-%Y %H:%M',
-												[extractor/camfm] Add extractors (#7083)

Authored by: garret1317
											
										
										
											2023-05-29 14:44:26 +00:00
+								    '%H:%M %d/%m/%Y',
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								])
 								DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 								DATE_FORMATS_MONTH_FIRST.extend([
 								    '%m-%d-%Y',
 								    '%m.%d.%Y',
 								    '%m/%d/%Y',
 								    '%m/%d/%y',
 								    '%m/%d/%Y %H:%M:%S',
 								])
-												[utils] Expose PACKED_CODES_RE

											
										
										
											2016-10-19 16:28:49 +00:00
+								PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
-												[extractor] Improve json+ld extraction

Related #5035

											
										
										
											2022-09-26 21:00:50 +00:00
+								JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>\s*(?P<json_ld>{.+?}|\[.+?\])\s*</script>'
-												[utils] Expose PACKED_CODES_RE

											
										
										
											2016-10-19 16:28:49 +00:00
-												[cleanup] Misc fixes

Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364

											
										
										
											2022-04-29 01:48:36 +00:00
+								NUMBER_RE = r'\d+(?:\.\d+)?'
-												[Yam] Add new extractor

											
										
										
											2015-02-13 07:14:23 +00:00
-												[utils] Improve performance using `functools.cache`

Closes #3786

											
										
										
											2022-05-19 14:06:31 +00:00
+								@functools.cache
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								def preferredencoding():
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Get preferred encoding.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    Returns the best encoding scheme for the system, based on
 								    locale.getpreferredencoding() and some further tweaks.
 								    """
 								    try:
 								        pref = locale.getpreferredencoding()
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								        'TEST'.encode(pref)
-												Don't use bare 'except:'

They catch any exception, including KeyboardInterrupt, we don't want to catch it.

											
										
										
											2015-03-27 12:02:20 +00:00
+								    except Exception:
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								        pref = 'UTF-8'
-												Simplified preferredencoding()

Not sure what is the point to use yield to return encoding, thus
it will simplify the whole function.

Signed-off-by: Arvydas Sidorenko <asido4@gmail.com>

											
										
										
											2012-07-01 16:21:27 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    return pref
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Correct JSON writing (Closes #596)

											
										
										
											2012-12-20 12:13:24 +00:00
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
+								def write_json_file(obj, fn):
-												Fix "ERROR: Cannot write metadata to JSON file" on Windows

Fixes #4246

											
										
										
											2014-11-20 06:05:39 +00:00
+								    """ Encode obj as JSON and write it to fn, atomically if possible """
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
-												[cleanup] Remove unused code paths (#2173)

Notes:

* `_windows_write_string`: Fixed in 3.6
  * https://bugs.python.org/issue1602
  * PEP: https://www.python.org/dev/peps/pep-0528

* Windows UTF-8 fix: Fixed in 3.3
  * https://bugs.python.org/issue13216

* `__loader__`: is always present in 3.3+
  * https://bugs.python.org/issue14646

* `workaround_optparse_bug9161`: Fixed in 2.7
  * https://bugs.python.org/issue9161

Authored by: fstirlitz

											
										
										
											2021-12-30 12:23:36 +00:00
+								    tf = tempfile.NamedTemporaryFile(
 								        prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
 								        suffix='.tmp', delete=False, mode='w', encoding='utf-8')
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
 								    try:
 								        with tf:
-												Allow unicode characters in `info.json`

Closes #2139

											
										
										
											2021-12-27 22:51:13 +00:00
+								            json.dump(obj, tf, ensure_ascii=False)
-												Fix "ERROR: Cannot write metadata to JSON file" on Windows

Fixes #4246

											
										
										
											2014-11-20 06:05:39 +00:00
+								        if sys.platform == 'win32':
 								            # Need to remove existing file on Windows, else os.rename raises
 								            # WindowsError or FileExistsError.
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								            with contextlib.suppress(OSError):
-												Fix "ERROR: Cannot write metadata to JSON file" on Windows

Fixes #4246

											
										
										
											2014-11-20 06:05:39 +00:00
+								                os.unlink(fn)
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        with contextlib.suppress(OSError):
-												[utils] Fix file permissions in write_json_file (closes #12471) (#25122)


											
										
										
											2020-05-19 20:21:52 +00:00
+								            mask = os.umask(0)
 								            os.umask(mask)
 								            os.chmod(tf.name, 0o666 & ~mask)
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
+								        os.rename(tf.name, fn)
-												Don't use bare 'except:'

They catch any exception, including KeyboardInterrupt, we don't want to catch it.

											
										
										
											2015-03-27 12:02:20 +00:00
+								    except Exception:
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        with contextlib.suppress(OSError):
-												[utils] Make JSON file writes atomic (Fixes #3549)

											
										
										
											2014-08-21 11:01:13 +00:00
+								            os.remove(tf.name)
 								        raise
-												[cleanup] Remove unused code paths (#2173)

Notes:

* `_windows_write_string`: Fixed in 3.6
  * https://bugs.python.org/issue1602
  * PEP: https://www.python.org/dev/peps/pep-0528

* Windows UTF-8 fix: Fixed in 3.3
  * https://bugs.python.org/issue13216

* `__loader__`: is always present in 3.3+
  * https://bugs.python.org/issue14646

* `workaround_optparse_bug9161`: Fixed in 2.7
  * https://bugs.python.org/issue9161

Authored by: fstirlitz

											
										
										
											2021-12-30 12:23:36 +00:00
+								def find_xpath_attr(node, xpath, key, val=None):
 								    """ Find the xpath xpath[@key=val] """
 								    assert re.match(r'^[a-zA-Z_-]+$', key)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    expr = xpath + (f'[@{key}]' if val is None else f"[@{key}='{val}']")
-												[cleanup] Remove unused code paths (#2173)

Notes:

* `_windows_write_string`: Fixed in 3.6
  * https://bugs.python.org/issue1602
  * PEP: https://www.python.org/dev/peps/pep-0528

* Windows UTF-8 fix: Fixed in 3.3
  * https://bugs.python.org/issue13216

* `__loader__`: is always present in 3.3+
  * https://bugs.python.org/issue14646

* `workaround_optparse_bug9161`: Fixed in 2.7
  * https://bugs.python.org/issue9161

Authored by: fstirlitz

											
										
										
											2021-12-30 12:23:36 +00:00
+								    return node.find(expr)
-												Add helper function find_path_attr

											
										
										
											2013-07-11 14:12:08 +00:00
-												Add an extractor for internetvideoarchive.com videos

It's used by videodetective.com

											
										
										
											2013-10-12 19:34:04 +00:00
+								# On python2.6 the xml.etree.ElementTree.Element methods don't support
 								# the namespace parameter
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Add an extractor for internetvideoarchive.com videos

It's used by videodetective.com

											
										
										
											2013-10-12 19:34:04 +00:00
+								def xpath_with_ns(path, ns_map):
 								    components = [c.split(':') for c in path.split('/')]
 								    replaced = []
 								    for c in components:
 								        if len(c) == 1:
 								            replaced.append(c[0])
 								        else:
 								            ns, tag = c
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            replaced.append(f'{{{ns_map[ns]}}}{tag}')
-												Add an extractor for internetvideoarchive.com videos

It's used by videodetective.com

											
										
										
											2013-10-12 19:34:04 +00:00
+								    return '/'.join(replaced)
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[utils] Add xpath_element and xpath_attr

											
										
										
											2015-09-04 17:56:45 +00:00
+								def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
-												[utils] Support list of xpath in xpath_element

											
										
										
											2015-10-31 16:39:44 +00:00
+								    def _find_xpath(xpath):
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								        return node.find(xpath)
-												[utils] Support list of xpath in xpath_element

											
										
										
											2015-10-31 16:39:44 +00:00
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    if isinstance(xpath, str):
-												[utils] Support list of xpath in xpath_element

											
										
										
											2015-10-31 16:39:44 +00:00
+								        n = _find_xpath(xpath)
 								    else:
 								        for xp in xpath:
 								            n = _find_xpath(xp)
 								            if n is not None:
 								                break
-												[utils] Apply 2.6 xpath craziness

This fixes ARD on 2.6

											
										
										
											2014-09-13 07:11:14 +00:00
-												[utils] Improve xpath_text

											
										
										
											2015-09-04 18:34:49 +00:00
+								    if n is None:
-												[utils] Add default value for xpath_text

											
										
										
											2015-06-28 16:56:07 +00:00
+								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
-												[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.

											
										
										
											2014-09-13 07:09:55 +00:00
+								            name = xpath if name is None else name
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise ExtractorError(f'Could not find XML element {name}')
-												[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.

											
										
										
											2014-09-13 07:09:55 +00:00
+								        else:
 								            return None
-												[utils] Add xpath_element and xpath_attr

											
										
										
											2015-09-04 17:56:45 +00:00
+								    return n
 								def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
-												[utils] Improve xpath_text

											
										
										
											2015-09-04 18:34:49 +00:00
+								    n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 								    if n is None or n == default:
 								        return n
 								    if n.text is None:
 								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
 								            name = xpath if name is None else name
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise ExtractorError(f'Could not find XML element\'s text {name}')
-												[utils] Improve xpath_text

											
										
										
											2015-09-04 18:34:49 +00:00
+								        else:
 								            return None
 								    return n.text
-												[utils] Add xpath_element and xpath_attr

											
										
										
											2015-09-04 17:56:45 +00:00
 								def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 								    n = find_xpath_attr(node, xpath, key)
 								    if n is None:
 								        if default is not NO_DEFAULT:
 								            return default
 								        elif fatal:
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								            name = f'{xpath}[@{key}]' if name is None else name
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise ExtractorError(f'Could not find XML attribute {name}')
-												[utils] Add xpath_element and xpath_attr

											
										
										
											2015-09-04 17:56:45 +00:00
+								        else:
 								            return None
 								    return n.attrib[key]
-												[ard] Make more robust against missing thumbnails

I cannot reproduce this error, it's from travis.

											
										
										
											2014-09-13 07:09:55 +00:00
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def get_element_by_id(id, html, **kwargs):
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											2012-12-19 14:21:14 +00:00
+								    """Return the content of the tag with the specified ID in the passed HTML document"""
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    return get_element_by_attribute('id', id, html, **kwargs)
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											2012-12-19 14:21:14 +00:00
-												[utils] Remove unused get_meta_content function

											
										
										
											2014-11-04 22:20:39 +00:00
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def get_element_html_by_id(id, html, **kwargs):
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								    """Return the html of the tag with the specified ID in the passed HTML document"""
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    return get_element_html_by_attribute('id', id, html, **kwargs)
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
-												[utils] Add get_element_by_class

For #9950

											
										
										
											2016-07-06 12:02:52 +00:00
+								def get_element_by_class(class_name, html):
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											2017-02-11 09:16:54 +00:00
+								    """Return the content of the first tag with the specified class in the passed HTML document"""
 								    retval = get_elements_by_class(class_name, html)
 								    return retval[0] if retval else None
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								def get_element_html_by_class(class_name, html):
 								    """Return the html of the first tag with the specified class in the passed HTML document"""
 								    retval = get_elements_html_by_class(class_name, html)
 								    return retval[0] if retval else None
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def get_element_by_attribute(attribute, value, html, **kwargs):
 								    retval = get_elements_by_attribute(attribute, value, html, **kwargs)
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											2017-02-11 09:16:54 +00:00
+								    return retval[0] if retval else None
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def get_element_html_by_attribute(attribute, value, html, **kargs):
 								    retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								    return retval[0] if retval else None
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def get_elements_by_class(class_name, html, **kargs):
-												Introduce get_elements_by_class and get_elements_by_attribute utility functions


											
										
										
											2017-02-11 09:16:54 +00:00
+								    """Return the content of all tags with the specified class in the passed HTML document as a list"""
 								    return get_elements_by_attribute(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*',
-												[utils] Add get_element_by_class

For #9950

											
										
										
											2016-07-06 12:02:52 +00:00
+								        html, escape_value=False)
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								def get_elements_html_by_class(class_name, html):
 								    """Return the html of all tags with the specified class in the passed HTML document as a list"""
 								    return get_elements_html_by_attribute(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        'class', rf'[^\'"]*(?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]*',
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								        html, escape_value=False)
 								def get_elements_by_attribute(*args, **kwargs):
-												Refactor IDParser to search for elements by any attribute not just ID

											
										
										
											2012-12-19 14:21:14 +00:00
+								    """Return the content of the tag with the specified attribute in the passed HTML document"""
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								    return [content for content, _ in get_elements_text_and_html_by_attribute(*args, **kwargs)]
 								def get_elements_html_by_attribute(*args, **kwargs):
 								    """Return the html of the tag with the specified attribute in the passed HTML document"""
 								    return [whole for _, whole in get_elements_text_and_html_by_attribute(*args, **kwargs)]
-												[extractor/wordpress:mb.miniAudioPlayer] Add embed extractor (#5087)

Closes https://github.com/yt-dlp/yt-dlp/issues/4994

Authored by: coletdjnz
											
										
										
											2022-10-09 05:55:26 +00:00
+								def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								    """
 								    Return the text (content) and the html (whole) of the tag with the specified
 								    attribute in the passed HTML document
 								    """
-												[extractor/bitchute] Improve `BitChuteChannelIE` (#5066)

Authored by: flashdagger, pukkandan
											
										
										
											2022-11-09 03:30:15 +00:00
+								    if not value:
 								        return
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											2012-04-10 22:22:51 +00:00
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								    quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
-												[utils] Improve `get_elements_text_and_html_by_attribute` regex (#2280)

Authored by: zmousm, pukkandan
											
										
										
											2022-01-09 18:14:56 +00:00
-												[utils] Add get_element_by_class

For #9950

											
										
										
											2016-07-06 12:02:52 +00:00
+								    value = re.escape(value) if escape_value else value
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								    partial_element_re = rf'''(?x)
-												[extractor/wordpress:mb.miniAudioPlayer] Add embed extractor (#5087)

Closes https://github.com/yt-dlp/yt-dlp/issues/4994

Authored by: coletdjnz
											
										
										
											2022-10-09 05:55:26 +00:00
+								        <(?P<tag>{tag})
-												[utils] Improve `get_elements_text_and_html_by_attribute` regex (#2280)

Authored by: zmousm, pukkandan
											
										
										
											2022-01-09 18:14:56 +00:00
+								         (?:\s(?:[^>"']|"[^"]*"|'[^']*')*)?
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								         \s{re.escape(attribute)}\s*=\s*(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
 								        '''
-												[utils] Use a regexp instead of HTMLParser for get_element_by_attribute

											
										
										
											2014-11-04 22:33:43 +00:00
-												[utils] Improve `get_elements_text_and_html_by_attribute` regex (#2280)

Authored by: zmousm, pukkandan
											
										
										
											2022-01-09 18:14:56 +00:00
+								    for m in re.finditer(partial_element_re, html):
 								        content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
-												[ustream] Simplify channel extraction

the ChannelParser has been moved to a new function in utils get_meta_content
Instead of the SocialStreamParser now it uses a regex

											
										
										
											2013-09-13 20:05:29 +00:00
-												[utils] Improve `get_elements_text_and_html_by_attribute` regex (#2280)

Authored by: zmousm, pukkandan
											
										
										
											2022-01-09 18:14:56 +00:00
+								        yield (
 								            unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            whole,
-												[utils] Improve `get_elements_text_and_html_by_attribute` regex (#2280)

Authored by: zmousm, pukkandan
											
										
										
											2022-01-09 18:14:56 +00:00
+								        )
-												[ustream] Simplify channel extraction

the ChannelParser has been moved to a new function in utils get_meta_content
Instead of the SocialStreamParser now it uses a regex

											
										
										
											2013-09-13 20:05:29 +00:00
-												[utils] PEP 8

											
										
										
											2016-03-16 15:50:04 +00:00
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								    """
 								    HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
 								    closing tag for the first opening tag it has encountered, and can be used
 								    as a context manager
 								    """
 								    class HTMLBreakOnClosingTagException(Exception):
 								        pass
 								    def __init__(self):
 								        self.tagstack = collections.deque()
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        html.parser.HTMLParser.__init__(self)
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
 								    def __enter__(self):
 								        return self
 								    def __exit__(self, *_):
 								        self.close()
 								    def close(self):
 								        # handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
 								        # so data remains buffered; we no longer have any interest in it, thus
 								        # override this method to discard it
 								        pass
 								    def handle_starttag(self, tag, _):
 								        self.tagstack.append(tag)
 								    def handle_endtag(self, tag):
 								        if not self.tagstack:
 								            raise compat_HTMLParseError('no tags in the stack')
 								        while self.tagstack:
 								            inner_tag = self.tagstack.pop()
 								            if inner_tag == tag:
 								                break
 								        else:
 								            raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
 								        if not self.tagstack:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise self.HTMLBreakOnClosingTagException
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
-												[cleanup] Lint and misc cleanup

											
										
										
											2022-11-06 20:59:58 +00:00
+								# XXX: This should be far less strict
-												[utils] Improve parsing for nested HTML elements (#2129)

and add functions to return the HTML of elements

Authored by: zmousm
											
										
										
											2022-01-05 18:37:49 +00:00
+								def get_element_text_and_html_by_tag(tag, html):
 								    """
 								    For the first element with the specified tag in the passed HTML document
 								    return its' content (text) and the whole element (html)
 								    """
 								    def find_or_raise(haystack, needle, exc):
 								        try:
 								            return haystack.index(needle)
 								        except ValueError:
 								            raise exc
 								    closing_tag = f'</{tag}>'
 								    whole_start = find_or_raise(
 								        html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
 								    content_start = find_or_raise(
 								        html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
 								    content_start += whole_start + 1
 								    with HTMLBreakOnClosingTagParser() as parser:
 								        parser.feed(html[whole_start:content_start])
 								        if not parser.tagstack or parser.tagstack[0] != tag:
 								            raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
 								        offset = content_start
 								        while offset < len(html):
 								            next_closing_tag_start = find_or_raise(
 								                html[offset:], closing_tag,
 								                compat_HTMLParseError(f'closing {tag} tag not found'))
 								            next_closing_tag_end = next_closing_tag_start + len(closing_tag)
 								            try:
 								                parser.feed(html[offset:offset + next_closing_tag_end])
 								                offset += next_closing_tag_end
 								            except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
 								                return html[content_start:offset + next_closing_tag_start], \
 								                    html[whole_start:offset + next_closing_tag_end]
 								        raise compat_HTMLParseError('unexpected end of html')
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								class HTMLAttributeParser(html.parser.HTMLParser):
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
+								    """Trivial HTML parser to gather the attributes for a single element"""
-												[mtv] fix mtv.com and more(?)


											
										
										
											2020-10-09 05:06:49 +00:00
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
+								    def __init__(self):
-												[utils] PEP 8

											
										
										
											2016-03-16 15:50:04 +00:00
+								        self.attrs = {}
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        html.parser.HTMLParser.__init__(self)
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
 								    def handle_starttag(self, tag, attrs):
 								        self.attrs = dict(attrs)
-												[extractor/epoch] Support videos without data-trailer (#5387)

Closes #5359
Authored by: gibson042, pukkandan
											
										
										
											2022-11-06 17:23:16 +00:00
+								        raise compat_HTMLParseError('done')
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
-												[utils] PEP 8

											
										
										
											2016-03-16 15:50:04 +00:00
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								class HTMLListAttrsParser(html.parser.HTMLParser):
-												[RaiplayRadio] Add extractors (#780)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/21837
Authored by: frafra
											
										
										
											2021-11-05 16:54:56 +00:00
+								    """HTML parser to gather the attributes for the elements of a list"""
 								    def __init__(self):
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        html.parser.HTMLParser.__init__(self)
-												[RaiplayRadio] Add extractors (#780)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/21837
Authored by: frafra
											
										
										
											2021-11-05 16:54:56 +00:00
+								        self.items = []
 								        self._level = 0
 								    def handle_starttag(self, tag, attrs):
 								        if tag == 'li' and self._level == 0:
 								            self.items.append(dict(attrs))
 								        self._level += 1
 								    def handle_endtag(self, tag):
 								        self._level -= 1
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
+								def extract_attributes(html_element):
 								    """Given a string for an HTML element such as
 								    <el
 								         a="foo" B="bar" c="&98;az" d=boz
 								         empty= noval entity="&amp;"
 								         sq='"' dq="'"
 								    >
 								    Decode and return a dictionary of attributes.
 								    {
 								        'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 								        'empty': '', 'noval': None, 'entity': '&',
 								        'sq': '"', 'dq': '\''
 								    }.
 								    """
 								    parser = HTMLAttributeParser()
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								    with contextlib.suppress(compat_HTMLParseError):
-												[utils] Handle HTMLParseError in extract_attributes (closes #13349)

											
										
										
											2017-06-11 18:52:24 +00:00
+								        parser.feed(html_element)
 								        parser.close()
-												[utils] Add extract_attributes for extracting html tag attributes

This is much more robust than just using regexps, and handles all
the common scenarios, such as empty/no values, repeated attributes,
entity decoding, mixed case names, and the different possible value
quoting schemes.

											
										
										
											2016-01-02 19:49:59 +00:00
+								    return parser.attrs
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											2012-04-10 22:22:51 +00:00
-												[utils] PEP 8

											
										
										
											2016-03-16 15:50:04 +00:00
-												[RaiplayRadio] Add extractors (#780)

Original PR: https://github.com/ytdl-org/youtube-dl/pull/21837
Authored by: frafra
											
										
										
											2021-11-05 16:54:56 +00:00
+								def parse_list(webpage):
 								    """Given a string for an series of HTML <li> elements,
 								    return a dictionary of their attributes"""
 								    parser = HTMLListAttrsParser()
 								    parser.feed(webpage)
 								    parser.close()
 								    return parser.items
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											2012-04-10 22:22:51 +00:00
+								def clean_html(html):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Clean an HTML snippet into a readable string"""
-												[netzkino] Add new extractor (Fixes #4669)

											
										
										
											2015-01-09 22:59:18 +00:00
 								    if html is None:  # Convenience for sanitizing descriptions etc.
 								        return html
-												[utils] Strip double spaces in `clean_html`

Closes #2497
Authored by: dirkf

											
										
										
											2022-02-03 14:15:57 +00:00
+								    html = re.sub(r'\s+', ' ', html)
 								    html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
 								    html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    # Strip html tags
 								    html = re.sub('<.*?>', '', html)
 								    # Replace html entities
 								    html = unescapeHTML(html)
-												fix FunnyOrDieIE, MyVideoIE, TEDIE

											
										
										
											2013-03-29 14:59:13 +00:00
+								    return html.strip()
-												merged unescapeHTML branch; removed lxml dependency

											
										
										
											2012-04-10 22:22:51 +00:00
-												[extractor] Add `_search_json`

All fetching of JSON objects should eventually be done with this function
but only `youtube` is being refactored for now

											
										
										
											2022-06-03 15:32:31 +00:00
+								class LenientJSONDecoder(json.JSONDecoder):
-												[utils] `LenientJSONDecoder`: Parse unclosed objects

											
										
										
											2023-02-24 05:09:43 +00:00
+								    # TODO: Write tests
 								    def __init__(self, *args, transform_source=None, ignore_extra=False, close_objects=0, **kwargs):
-												[extractor] Add `_search_json`

All fetching of JSON objects should eventually be done with this function
but only `youtube` is being refactored for now

											
										
										
											2022-06-03 15:32:31 +00:00
+								        self.transform_source, self.ignore_extra = transform_source, ignore_extra
-												[utils] `LenientJSONDecoder`: Parse unclosed objects

											
										
										
											2023-02-24 05:09:43 +00:00
+								        self._close_attempts = 2 * close_objects
-												[extractor] Add `_search_json`

All fetching of JSON objects should eventually be done with this function
but only `youtube` is being refactored for now

											
										
										
											2022-06-03 15:32:31 +00:00
+								        super().__init__(*args, **kwargs)
-												[utils] `LenientJSONDecoder`: Parse unclosed objects

											
										
										
											2023-02-24 05:09:43 +00:00
+								    @staticmethod
 								    def _close_object(err):
 								        doc = err.doc[:err.pos]
 								        # We need to add comma first to get the correct error message
 								        if err.msg.startswith('Expecting \',\''):
 								            return doc + ','
 								        elif not doc.endswith(','):
 								            return
 								        if err.msg.startswith('Expecting property name'):
 								            return doc[:-1] + '}'
 								        elif err.msg.startswith('Expecting value'):
 								            return doc[:-1] + ']'
-												[extractor] Add `_search_json`

All fetching of JSON objects should eventually be done with this function
but only `youtube` is being refactored for now

											
										
										
											2022-06-03 15:32:31 +00:00
+								    def decode(self, s):
 								        if self.transform_source:
 								            s = self.transform_source(s)
-												[utils] `LenientJSONDecoder`: Parse unclosed objects

											
										
										
											2023-02-24 05:09:43 +00:00
+								        for attempt in range(self._close_attempts + 1):
 								            try:
 								                if self.ignore_extra:
 								                    return self.raw_decode(s.lstrip())[0]
 								                return super().decode(s)
 								            except json.JSONDecodeError as e:
 								                if e.pos is None:
 								                    raise
 								                elif attempt < self._close_attempts:
 								                    s = self._close_object(e)
 								                    if s is not None:
 								                        continue
-												[cleanup] Misc (#8598)

Authored by: bashonly, pukkandan, seproDev, Grub4K

Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
											
										
										
											2023-12-30 21:27:36 +00:00
+								                raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
-												[utils] `LenientJSONDecoder`: Parse unclosed objects

											
										
										
											2023-02-24 05:09:43 +00:00
+								        assert False, 'Too many attempts to decode JSON'
-												[extractor] Add `_search_json`

All fetching of JSON objects should eventually be done with this function
but only `youtube` is being refactored for now

											
										
										
											2022-06-03 15:32:31 +00:00
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								def sanitize_open(filename, open_mode):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Try to open the given filename, and slightly tweak it if this fails.
 								    Attempts to open the given filename. If this fails, it tries to change
 								    the filename slightly, step by step, until it's either able to open it
 								    or it fails and raises a final exception, like the standard open()
 								    function.
 								    It returns the tuple (stream, definitive_file_name).
 								    """
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    if filename == '-':
 								        if sys.platform == 'win32':
 								            import msvcrt
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
-												[docs] Consistent use of `e.g.` (#4643)

Authored by: Lesmiscore
											
										
										
											2022-08-14 12:04:13 +00:00
+								            # stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
-												[utils] sanitize_open: Allow any IO stream as stdout

Fixes: https://github.com/yt-dlp/yt-dlp/issues/3298#issuecomment-1181754989

											
										
										
											2022-07-30 22:01:20 +00:00
+								            with contextlib.suppress(io.UnsupportedOperation):
 								                msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								        return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    for attempt in range(2):
 								        try:
 								            try:
-												Do not lock downloading file on Windows

Closes #3124

											
										
										
											2022-04-05 17:45:17 +00:00
+								                if sys.platform == 'win32':
-												[test] Add `test_locked_file`

											
										
										
											2022-04-07 06:00:46 +00:00
+								                    # FIXME: An exclusive lock also locks the file from being read.
 								                    # Since windows locks are mandatory, don't lock the file on windows (for now).
 								                    # Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                    raise LockingUnsupportedError
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								                stream = locked_file(filename, open_mode, block=False).__enter__()
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								            except OSError:
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								                stream = open(filename, open_mode)
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								            return stream, filename
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        except OSError as err:
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								            if attempt or err.errno in (errno.EACCES,):
 								                raise
 								            old_filename, filename = filename, sanitize_path(filename)
 								            if old_filename == filename:
 								                raise
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
 								def timeconvert(timestr):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Convert RFC 2822 defined time string into system timestamp"""
 								    timestamp = None
 								    timetuple = email.utils.parsedate_tz(timestr)
 								    if timetuple is not None:
 								        timestamp = email.utils.mktime_tz(timetuple)
 								    return timestamp
-												New optoin --restrict-filenames

											
										
										
											2012-11-26 22:58:46 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Sanitizes a string so it could be used as part of a filename.
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								    @param restricted   Use a stricter subset of allowed characters
 								    @param is_id        Whether this is an ID that should be kept unchanged if possible.
 								                        If unset, yt-dlp's new sanitization rules are in effect
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								    if s == '':
 								        return ''
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    def replace_insane(char):
-												improved performance by extracting accented chars to top level

											
										
										
											2016-05-03 00:40:30 +00:00
+								        if restricted and char in ACCENT_CHARS:
 								            return ACCENT_CHARS[char]
-												[outtmpl] Alternate form of format type `l` for `\n` delimited list

											
										
										
											2021-09-27 05:59:16 +00:00
+								        elif not restricted and char == '\n':
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								            return '\0 '
-												[outtmpl] Smarter replacing of unsupported characters

Closes #1330

											
										
										
											2022-08-04 14:49:32 +00:00
+								        elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?|/\\':
 								            # Replace with their full-width unicode counterparts
 								            return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
-												[outtmpl] Alternate form of format type `l` for `\n` delimited list

											
										
										
											2021-09-27 05:59:16 +00:00
+								        elif char == '?' or ord(char) < 32 or ord(char) == 127:
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								            return ''
 								        elif char == '"':
 								            return '' if restricted else '\''
 								        elif char == ':':
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								            return '\0_\0-' if restricted else '\0 \0-'
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								        elif char in '\\/|*<>':
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								            return '\0_'
 								        if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
-												Update to ytdl-commit-be008e6 (#8836)

- [utils] Make restricted filenames ignore some Unicode categories (by dirkf)
- [ie/telewebion] Fix extraction (by Grub4K)
- [ie/imgur] Overhaul extractor (by bashonly, Grub4K)
- [ie/EpidemicSound] Add extractor (by Grub4K)

Authored by: bashonly, dirkf, Grub4K

Co-authored-by: bashonly <bashonly@protonmail.com>
											
										
										
											2023-12-26 00:40:24 +00:00
+								            return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								        return char
-												Update to ytdl-commit-de39d128

[extractor/ceskatelevize] Back-port extractor from yt-dlp
https://github.com/ytdl-org/youtube-dl/commit/de39d1281cea499cb1adfce5ff7e0a56f1bad5fe

Closes #5361, Closes #4634, Closes #5210

											
										
										
											2022-11-06 19:46:33 +00:00
+								    # Replace look-alike Unicode glyphs
 								    if restricted and (is_id is NO_DEFAULT or not is_id):
-												[outtmpl] Smarter replacing of unsupported characters

Closes #1330

											
										
										
											2022-08-04 14:49:32 +00:00
+								        s = unicodedata.normalize('NFKC', s)
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								    s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)  # Handle timestamps
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								    result = ''.join(map(replace_insane, s))
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								    if is_id is NO_DEFAULT:
-												[cleanup] Misc cleanup

											
										
										
											2022-06-28 05:10:54 +00:00
+								        result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result)  # Remove repeated substitute chars
 								        STRIP_RE = r'(?:\0.|[ _-])*'
-												[outtmpl] Limit changes during sanitization

Closes #2761

											
										
										
											2022-03-27 04:34:04 +00:00
+								        result = re.sub(f'^\0.{STRIP_RE}|{STRIP_RE}\0.$', '', result)  # Remove substitute chars from start/end
 								    result = result.replace('\0', '') or '_'
-												Keep video IDs verbatim if possible (Closes #571)

											
										
										
											2012-12-03 14:36:24 +00:00
+								    if not is_id:
 								        while '__' in result:
 								            result = result.replace('__', '_')
 								        result = result.strip('_')
 								        # Common case of "Foreign band name - English song title"
 								        if restricted and result.startswith('-_'):
 								            result = result[2:]
-												[utils] Prevent hyphen at beginning of filename (Fixes #5035)

											
										
										
											2015-02-24 10:38:01 +00:00
+								        if result.startswith('-'):
 								            result = '_' + result[len('-'):]
-												[utils] Streap leading dots

Fixes #2865, closes #5087

											
										
										
											2015-03-02 18:07:17 +00:00
+								        result = result.lstrip('.')
-												Keep video IDs verbatim if possible (Closes #571)

											
										
										
											2012-12-03 14:36:24 +00:00
+								        if not result:
 								            result = '_'
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    return result
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Option `--windows-filenames` to force use of windows compatible filenames

* Also changed `--trim-file-name` to `--trim-filenames` to be similar to related options

Related: https://web.archive.org/web/20210217190806/https://old.reddit.com/r/youtubedl/comments/llc4o5/do_you_guys_also_have_this_error

:ci skip dl

											
										
										
											2021-02-17 19:09:38 +00:00
+								def sanitize_path(s, force=False):
-												[utils] Add sanitize_path

											
										
										
											2015-03-08 14:55:22 +00:00
+								    """Sanitizes and normalizes path on Windows"""
-												[core] Fix support for upcoming Python 3.12 (#8130)

This also adds the following test runners:
- `3.12-dev` on `ubuntu-latest`
- `3.12-dev` on `windows-latest`
- `pypy-3.10` on `ubuntu-latest`

Authored by: Grub4K
											
										
										
											2023-09-17 10:56:50 +00:00
+								    # XXX: this handles drive relative paths (c:sth) incorrectly
-												Option `--windows-filenames` to force use of windows compatible filenames

* Also changed `--trim-file-name` to `--trim-filenames` to be similar to related options

Related: https://web.archive.org/web/20210217190806/https://old.reddit.com/r/youtubedl/comments/llc4o5/do_you_guys_also_have_this_error

:ci skip dl

											
										
										
											2021-02-17 19:09:38 +00:00
+								    if sys.platform == 'win32':
-												Fix `--windows-filenames` removing `/` from UNIX paths

:ci skip all

											
										
										
											2021-02-24 18:32:44 +00:00
+								        force = False
-												Option `--windows-filenames` to force use of windows compatible filenames

* Also changed `--trim-file-name` to `--trim-filenames` to be similar to related options

Related: https://web.archive.org/web/20210217190806/https://old.reddit.com/r/youtubedl/comments/llc4o5/do_you_guys_also_have_this_error

:ci skip dl

											
										
										
											2021-02-17 19:09:38 +00:00
+								        drive_or_unc, _ = os.path.splitdrive(s)
 								    elif force:
 								        drive_or_unc = ''
 								    else:
-												[utils] Add sanitize_path

											
										
										
											2015-03-08 14:55:22 +00:00
+								        return s
-												Option `--windows-filenames` to force use of windows compatible filenames

* Also changed `--trim-file-name` to `--trim-filenames` to be similar to related options

Related: https://web.archive.org/web/20210217190806/https://old.reddit.com/r/youtubedl/comments/llc4o5/do_you_guys_also_have_this_error

:ci skip dl

											
										
										
											2021-02-17 19:09:38 +00:00
-												[utils] Fix splitunc deprecation warning

											
										
										
											2015-04-16 16:12:38 +00:00
+								    norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 								    if drive_or_unc:
-												[utils] Add sanitize_path

											
										
										
											2015-03-08 14:55:22 +00:00
+								        norm_path.pop(0)
 								    sanitized_path = [
-												Fix "invalid escape sequences" error on Python 3.6

											
										
										
											2017-01-02 12:08:07 +00:00
+								        path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
-												[utils] Add sanitize_path

											
										
										
											2015-03-08 14:55:22 +00:00
+								        for path_part in norm_path]
-												[utils] Fix splitunc deprecation warning

											
										
										
											2015-04-16 16:12:38 +00:00
+								    if drive_or_unc:
 								        sanitized_path.insert(0, drive_or_unc + os.path.sep)
-												[utils] `sanitize_path`: Fix when path is empty string

											
										
										
											2022-04-09 04:41:25 +00:00
+								    elif force and s and s[0] == os.path.sep:
-												Fix `--windows-filenames` removing `/` from UNIX paths

:ci skip all

											
										
										
											2021-02-24 18:32:44 +00:00
+								        sanitized_path.insert(0, os.path.sep)
-												[core] Fix support for upcoming Python 3.12 (#8130)

This also adds the following test runners:
- `3.12-dev` on `ubuntu-latest`
- `3.12-dev` on `windows-latest`
- `pypy-3.10` on `ubuntu-latest`

Authored by: Grub4K
											
										
										
											2023-09-17 10:56:50 +00:00
+								    # TODO: Fix behavioral differences <3.12
 								    # The workaround using `normpath` only superficially passes tests
 								    # Ref: https://github.com/python/cpython/pull/100351
 								    return os.path.normpath(os.path.join(*sanitized_path))
-												[utils] Add sanitize_path

											
										
										
											2015-03-08 14:55:22 +00:00
-												[extractor] Framework for embed detection (#4307)

											
										
										
											2022-08-01 01:22:03 +00:00
+								def sanitize_url(url, *, scheme='http'):
-												[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

											
										
										
											2018-02-19 15:50:23 +00:00
+								    # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 								    # the number of unwanted failures due to missing protocol
-												[cleanup] Minor fixes

											
										
										
											2022-05-18 03:34:30 +00:00
+								    if url is None:
 								        return
 								    elif url.startswith('//'):
-												[extractor] Framework for embed detection (#4307)

											
										
										
											2022-08-01 01:22:03 +00:00
+								        return f'{scheme}:{url}'
-												[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

											
										
										
											2018-02-19 15:50:23 +00:00
+								    # Fix some common typos seen so far
 								    COMMON_TYPOS = (
-												Start moving to ytdl-org

											
										
										
											2019-03-09 12:14:41 +00:00
+								        # https://github.com/ytdl-org/youtube-dl/issues/15649
-												[utils] Fixup some common URL's typos in sanitize_url (closes #15649)

											
										
										
											2018-02-19 15:50:23 +00:00
+								        (r'^httpss://', r'https://'),
 								        # https://bx1.be/lives/direct-tv/
 								        (r'^rmtp([es]?)://', r'rtmp\1://'),
 								    )
 								    for mistake, fixup in COMMON_TYPOS:
 								        if re.match(mistake, url):
 								            return re.sub(mistake, fixup, url)
-												[utils] Escape URLs in `sanitized_Request`, not `sanitize_url`
d2558234cf5dd12d6896eed5427b7dcdb3ab7b5a added escaping of URLs while sanitizing. However, `sanitize_url` may not always receive an actual URL.
Eg: When using `yt-dlp "search query" --default-search ytsearch`, `search query` gets escaped to `search%20query` before being prefixed with `ytsearch:` which is not the intended behavior. So the escaping is moved to `sanitized_Request` instead.

											
										
										
											2021-06-01 12:35:41 +00:00
+								    return url
-												[utils] Extract sanitize_url routine

											
										
										
											2016-03-26 13:33:57 +00:00
-												Handle Basic Auth `user:pass` in URLs

Fixes https://github.com/ytdl-org/youtube-dl/issues/20258, https://github.com/ytdl-org/youtube-dl/issues/26211
Authored by: hhirtz, pukkandan

											
										
										
											2021-04-19 12:07:45 +00:00
+								def extract_basic_auth(url):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    parts = urllib.parse.urlsplit(url)
-												Handle Basic Auth `user:pass` in URLs

Fixes https://github.com/ytdl-org/youtube-dl/issues/20258, https://github.com/ytdl-org/youtube-dl/issues/26211
Authored by: hhirtz, pukkandan

											
										
										
											2021-04-19 12:07:45 +00:00
+								    if parts.username is None:
 								        return url, None
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    url = urllib.parse.urlunsplit(parts._replace(netloc=(
-												Handle Basic Auth `user:pass` in URLs

Fixes https://github.com/ytdl-org/youtube-dl/issues/20258, https://github.com/ytdl-org/youtube-dl/issues/26211
Authored by: hhirtz, pukkandan

											
										
										
											2021-04-19 12:07:45 +00:00
+								        parts.hostname if parts.port is None
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        else f'{parts.hostname}:{parts.port}')))
-												Handle Basic Auth `user:pass` in URLs

Fixes https://github.com/ytdl-org/youtube-dl/issues/20258, https://github.com/ytdl-org/youtube-dl/issues/26211
Authored by: hhirtz, pukkandan

											
										
										
											2021-04-19 12:07:45 +00:00
+								    auth_payload = base64.b64encode(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        ('{}:{}'.format(parts.username, parts.password or '')).encode())
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    return url, f'Basic {auth_payload.decode()}'
-												Handle Basic Auth `user:pass` in URLs

Fixes https://github.com/ytdl-org/youtube-dl/issues/20258, https://github.com/ytdl-org/youtube-dl/issues/26211
Authored by: hhirtz, pukkandan

											
										
										
											2021-04-19 12:07:45 +00:00
-												[utils] Introduce expand_path

											
										
										
											2017-03-25 19:30:10 +00:00
+								def expand_path(s):
-												[docs] Misc improvements

Closes #4987, Closes #4906, Closes #4919, Closes #4977, Closes #4979

											
										
										
											2022-09-21 20:07:44 +00:00
+								    """Expand shell variables and ~"""
-												[utils] Introduce expand_path

											
										
										
											2017-03-25 19:30:10 +00:00
+								    return os.path.expandvars(compat_expanduser(s))
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								def orderedSet(iterable, *, lazy=False):
 								    """Remove all duplicates from the input iterable"""
 								    def _iter():
 								        seen = []  # Do not use set since the items can be unhashable
 								        for x in iterable:
 								            if x not in seen:
 								                seen.append(x)
 								                yield x
 								    return _iter() if lazy else list(_iter())
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[instagram] Fix info_dict key name

											
										
										
											2014-03-24 00:40:09 +00:00
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											2016-06-10 07:11:55 +00:00
+								def _htmlentity_transform(entity_with_semicolon):
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								    """Transforms an HTML entity to a character."""
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											2016-06-10 07:11:55 +00:00
+								    entity = entity_with_semicolon[:-1]
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								    # Known non-numeric HTML entity
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								    if entity in html.entities.name2codepoint:
 								        return chr(html.entities.name2codepoint[entity])
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
-												[docs] Consistent use of `e.g.` (#4643)

Authored by: Lesmiscore
											
										
										
											2022-08-14 12:04:13 +00:00
+								    # TODO: HTML5 allows entities without a semicolon.
 								    # E.g. '&Eacuteric' should be decoded as 'Éric'.
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								    if entity_with_semicolon in html.entities.html5:
 								        return html.entities.html5[entity_with_semicolon]
-												[utils] Decode HTML5 entities

Used in test_Vporn_1. Also related to #9270

											
										
										
											2016-06-10 07:11:55 +00:00
-												[utils] Escape all HTML entities written in hexadecimal form

											
										
										
											2015-03-26 15:15:27 +00:00
+								    mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								    if mobj is not None:
 								        numstr = mobj.group(1)
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								        if numstr.startswith('x'):
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								            base = 16
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            numstr = f'0{numstr}'
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								        else:
 								            base = 10
-												Start moving to ytdl-org

											
										
										
											2019-03-09 12:14:41 +00:00
+								        # See https://github.com/ytdl-org/youtube-dl/issues/7518
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        with contextlib.suppress(ValueError):
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								            return chr(int(numstr, base))
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
 								    # Unknown entity in name, return its literal representation
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return f'&{entity};'
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
+								def unescapeHTML(s):
-												[instagram] Fix info_dict key name

											
										
										
											2014-03-24 00:40:09 +00:00
+								    if s is None:
 								        return None
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								    assert isinstance(s, str)
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[utils] Modernize tests

											
										
										
											2014-08-27 17:11:45 +00:00
+								    return re.sub(
-												[utils] Fix unescapeHTML for misformed string like "&a&quot;" (#13935)

											
										
										
											2017-08-19 13:40:53 +00:00
+								        r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Fix/work around Windows encoding issues (Fixes #2095)

											
										
										
											2014-01-05 02:07:55 +00:00
-												[downloader/mhtml] Add new downloader (#343)

This downloader is intended to be used for streams that consist of a
timed sequence of stand-alone images, such as slideshows or thumbnail
streams

This can be used for implementing:

https://github.com/ytdl-org/youtube-dl/issues/4974#issue-58006762
https://github.com/ytdl-org/youtube-dl/issues/4540#issuecomment-69574231
https://github.com/ytdl-org/youtube-dl/pull/11185#issuecomment-335554239

https://github.com/ytdl-org/youtube-dl/issues/9868
https://github.com/ytdl-org/youtube-dl/pull/14951


Authored by: fstirlitz

											
										
										
											2021-05-23 16:34:49 +00:00
+								def escapeHTML(text):
 								    return (
 								        text
 								        .replace('&', '&amp;')
 								        .replace('<', '&lt;')
 								        .replace('>', '&gt;')
 								        .replace('"', '&quot;')
 								        .replace("'", '&#39;')
 								    )
-												Add option `--netrc-cmd` (#6682)

Authored by: NDagestad, pukkandan
Closes #1706
											
										
										
											2023-06-21 03:07:42 +00:00
+								class netrc_from_content(netrc.netrc):
 								    def __init__(self, content):
 								        self.hosts, self.macros = {}, {}
 								        with io.StringIO(content) as stream:
 								            self._parse('-', stream, False)
-												Don't create console for subprocesses on Windows (#1261)

Closes #1251
											
										
										
											2021-10-20 16:19:40 +00:00
+								class Popen(subprocess.Popen):
 								    if sys.platform == 'win32':
 								        _startupinfo = subprocess.STARTUPINFO()
 								        _startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 								    else:
 								        _startupinfo = None
-												Restore LD_LIBRARY_PATH when using PyInstaller (#4666)

Authored by: Lesmiscore
											
										
										
											2022-08-30 16:24:14 +00:00
+								    @staticmethod
 								    def _fix_pyinstaller_ld_path(env):
 								        """Restore LD_LIBRARY_PATH when using PyInstaller
 								            Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
 								                 https://github.com/yt-dlp/yt-dlp/issues/4573
 								        """
 								        if not hasattr(sys, '_MEIPASS'):
 								            return
 								        def _fix(key):
 								            orig = env.get(f'{key}_ORIG')
 								            if orig is None:
 								                env.pop(key, None)
 								            else:
 								                env[key] = orig
 								        _fix('LD_LIBRARY_PATH')  # Linux
 								        _fix('DYLD_LIBRARY_PATH')  # macOS
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)

The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference.

Authored by: Grub4K

											
										
										
											2023-09-24 00:29:01 +00:00
+								    def __init__(self, args, *remaining, env=None, text=False, shell=False, **kwargs):
-												Restore LD_LIBRARY_PATH when using PyInstaller (#4666)

Authored by: Lesmiscore
											
										
										
											2022-08-30 16:24:14 +00:00
+								        if env is None:
 								            env = os.environ.copy()
 								        self._fix_pyinstaller_ld_path(env)
-												[utils] `Popen`: Shim undocumented `text_mode` property

Fixes #6317

Authored by: Grub4K
											
										
										
											2023-02-23 03:18:45 +00:00
+								        self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								        if text is True:
 								            kwargs['universal_newlines'] = True  # For 3.6 compatibility
 								            kwargs.setdefault('encoding', 'utf-8')
 								            kwargs.setdefault('errors', 'replace')
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)

The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference.

Authored by: Grub4K

											
										
										
											2023-09-24 00:29:01 +00:00
 								        if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
 								            if not isinstance(args, str):
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								                args = shell_quote(args, shell=True)
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)

The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference.

Authored by: Grub4K

											
										
										
											2023-09-24 00:29:01 +00:00
+								            shell = False
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								            # Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
 								            env['='] = '"^\n\n"'
 								            args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)

The shell escape function is now using `""` instead of `\"`. `utils.Popen` has been patched to properly quote commands.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg for reference.

Authored by: Grub4K

											
										
										
											2023-09-24 00:29:01 +00:00
 								        super().__init__(args, *remaining, env=env, shell=shell, **kwargs, startupinfo=self._startupinfo)
 								    def __comspec(self):
 								        comspec = os.environ.get('ComSpec') or os.path.join(
 								            os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
 								        if os.path.isabs(comspec):
 								            return comspec
 								        raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
-												Don't create console for subprocesses on Windows (#1261)

Closes #1251
											
										
										
											2021-10-20 16:19:40 +00:00
 								    def communicate_or_kill(self, *args, **kwargs):
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								        try:
 								            return self.communicate(*args, **kwargs)
 								        except BaseException:  # Including KeyboardInterrupt
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								            self.kill(timeout=None)
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								            raise
-												Don't create console for subprocesses on Windows (#1261)

Closes #1251
											
										
										
											2021-10-20 16:19:40 +00:00
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								    def kill(self, *, timeout=0):
 								        super().kill()
 								        if timeout != 0:
 								            self.wait(timeout=timeout)
 								    @classmethod
-												[jsinterp] Implement timeout

Workaround for #4716

											
										
										
											2022-08-22 00:49:06 +00:00
+								    def run(cls, *args, timeout=None, **kwargs):
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								        with cls(*args, **kwargs) as proc:
-												[utils] `Popen`: Shim undocumented `text_mode` property

Fixes #6317

Authored by: Grub4K
											
										
										
											2023-02-23 03:18:45 +00:00
+								            default = '' if proc.__text_mode else b''
-												[jsinterp] Implement timeout

Workaround for #4716

											
										
										
											2022-08-22 00:49:06 +00:00
+								            stdout, stderr = proc.communicate_or_kill(timeout=timeout)
-												[utils] `Popen.run`: Fix default return in binary mode

											
										
										
											2022-09-25 21:22:21 +00:00
+								            return stdout or default, stderr or default, proc.returncode
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
-												Don't create console for subprocesses on Windows (#1261)

Closes #1251
											
										
										
											2021-10-20 16:19:40 +00:00
-												[ffmpeg] Correct argument encoding on Windows with Python 2.x

Fixes #2924

											
										
										
											2014-05-16 13:47:54 +00:00
+								def encodeArgument(s):
-												[cleanup] Remove unused code paths (#2173)

Notes:

* `_windows_write_string`: Fixed in 3.6
  * https://bugs.python.org/issue1602
  * PEP: https://www.python.org/dev/peps/pep-0528

* Windows UTF-8 fix: Fixed in 3.3
  * https://bugs.python.org/issue13216

* `__loader__`: is always present in 3.3+
  * https://bugs.python.org/issue14646

* `workaround_optparse_bug9161`: Fixed in 2.7
  * https://bugs.python.org/issue9161

Authored by: fstirlitz

											
										
										
											2021-12-30 12:23:36 +00:00
+								    # Legacy code that uses byte strings
 								    # Uncomment the following line after fixing all post processors
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    # assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
-												[cleanup] Remove unused code paths (#2173)

Notes:

* `_windows_write_string`: Fixed in 3.6
  * https://bugs.python.org/issue1602
  * PEP: https://www.python.org/dev/peps/pep-0528

* Windows UTF-8 fix: Fixed in 3.3
  * https://bugs.python.org/issue13216

* `__loader__`: is always present in 3.3+
  * https://bugs.python.org/issue14646

* `workaround_optparse_bug9161`: Fixed in 2.7
  * https://bugs.python.org/issue9161

Authored by: fstirlitz

											
										
										
											2021-12-30 12:23:36 +00:00
+								    return s if isinstance(s, str) else s.decode('ascii')
-												[ffmpeg] Correct argument encoding on Windows with Python 2.x

Fixes #2924

											
										
										
											2014-05-16 13:47:54 +00:00
-												[utils] Standardize timestamp formatting code
Closes #1285

											
										
										
											2021-10-19 17:28:14 +00:00
+								_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
 								def timetuple_from_msec(msec):
 								    secs, msec = divmod(msec, 1000)
 								    mins, secs = divmod(secs, 60)
 								    hrs, mins = divmod(mins, 60)
 								    return _timetuple(hrs, mins, secs, msec)
-												[downloader/mhtml] Add new downloader (#343)

This downloader is intended to be used for streams that consist of a
timed sequence of stand-alone images, such as slideshows or thumbnail
streams

This can be used for implementing:

https://github.com/ytdl-org/youtube-dl/issues/4974#issue-58006762
https://github.com/ytdl-org/youtube-dl/issues/4540#issuecomment-69574231
https://github.com/ytdl-org/youtube-dl/pull/11185#issuecomment-335554239

https://github.com/ytdl-org/youtube-dl/issues/9868
https://github.com/ytdl-org/youtube-dl/pull/14951


Authored by: fstirlitz

											
										
										
											2021-05-23 16:34:49 +00:00
+								def formatSeconds(secs, delim=':', msec=False):
-												[utils] Standardize timestamp formatting code
Closes #1285

											
										
										
											2021-10-19 17:28:14 +00:00
+								    time = timetuple_from_msec(secs * 1000)
 								    if time.hours:
 								        ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
 								    elif time.minutes:
 								        ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
-												twitch.tv chapters (#810): print out start and end time

											
										
										
											2013-05-04 10:02:18 +00:00
+								    else:
-												[utils] Standardize timestamp formatting code
Closes #1285

											
										
										
											2021-10-19 17:28:14 +00:00
+								        ret = '%d' % time.seconds
 								    return '%s.%03d' % (ret, time.milliseconds) if msec else ret
-												twitch.tv chapters (#810): print out start and end time

											
										
										
											2013-05-04 10:02:18 +00:00
-												Add new --print-traffic option

											
										
										
											2013-12-29 14:28:32 +00:00
-												[utils] Improve bug_report_message

Add an optional argument specifying the text that should go before
the message.

											
										
										
											2021-04-22 19:16:29 +00:00
+								def bug_reports_message(before=';'):
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    from ..update import REPOSITORY
-												[update] Expose more functionality to API

											
										
										
											2022-06-21 11:32:56 +00:00
 								    msg = (f'please report this issue on  https://github.com/{REPOSITORY}/issues?q= , '
 								           'filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U')
-												[utils] Improve bug_report_message

Add an optional argument specifying the text that should go before
the message.

											
										
										
											2021-04-22 19:16:29 +00:00
 								    before = before.rstrip()
 								    if not before or before.endswith(('.', '!', '?')):
 								        msg = msg[0].title() + msg[1:]
 								    return (before + ' ' if before else '') + msg
-												InfoExtractor._search_regex: Suggest updating when the regex is not found (suggested in #5442)

Reuse the same message from ExtractorError

											
										
										
											2015-04-17 12:55:24 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class YoutubeDLError(Exception):
 								    """Base exception for YoutubeDL errors."""
-												[cleanup] Minor improvements to error and debug messages

											
										
										
											2021-11-09 22:49:33 +00:00
+								    msg = None
 								    def __init__(self, msg=None):
 								        if msg is not None:
 								            self.msg = msg
 								        elif self.msg is None:
 								            self.msg = type(self).__name__
 								        super().__init__(self.msg)
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
 								class ExtractorError(YoutubeDLError):
-												ExtractorError for errors during extraction

											
										
										
											2013-01-01 19:27:53 +00:00
+								    """Error during info extraction."""
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[extractor] Show video id in error messages if possible

											
										
										
											2021-08-19 01:49:23 +00:00
+								    def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
-												Do not show bug report for errors that are to be expected (Closes #973)

											
										
										
											2013-07-02 06:40:21 +00:00
+								        """ tb, if given, is the original traceback (so that it can be printed out).
-												Completely change project name to yt-dlp (#85)

* All modules and binary names are changed
* All documentation references changed
* yt-dlp no longer loads youtube-dlc config files
* All URLs changed to point to organization account

Co-authored-by: Pccode66
Co-authored-by: pukkandan
											
										
										
											2021-02-24 18:45:56 +00:00
+								        If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
-												Do not show bug report for errors that are to be expected (Closes #973)

											
										
										
											2013-07-02 06:40:21 +00:00
+								        """
-												[networking] Add module (#2861)

No actual changes - code is only moved around

											
										
										
											2023-07-15 09:00:08 +00:00
+								        from ..networking.exceptions import network_exceptions
-												[utils] Add `network_exceptions`

											
										
										
											2021-05-04 17:06:18 +00:00
+								        if sys.exc_info()[0] in network_exceptions:
-												Do not show bug report for errors that are to be expected (Closes #973)

											
										
										
											2013-07-02 06:40:21 +00:00
+								            expected = True
-												do not ask the user to report network errors

											
										
										
											2013-06-09 09:55:08 +00:00
-												Fix doubling of `video_id` in `ExtractorError`

											
										
										
											2022-03-04 14:07:43 +00:00
+								        self.orig_msg = str(msg)
-												ExtractorError for errors during extraction

											
										
										
											2013-01-01 19:27:53 +00:00
+								        self.traceback = tb
-												[extractor] Show video id in error messages if possible

											
										
										
											2021-08-19 01:49:23 +00:00
+								        self.expected = expected
-												[addanime] improve

											
										
										
											2013-08-28 02:25:38 +00:00
+								        self.cause = cause
-												[youtube] Include video Id in common error message (Fixes #2786)

											
										
										
											2014-04-21 18:34:03 +00:00
+								        self.video_id = video_id
-												[extractor] Show video id in error messages if possible

											
										
										
											2021-08-19 01:49:23 +00:00
+								        self.ie = ie
 								        self.exc_info = sys.exc_info()  # preserve original exception
-												[utils] `ExtractorError`: Fix `exc_info`

											
										
										
											2022-06-20 07:00:02 +00:00
+								        if isinstance(self.exc_info[1], ExtractorError):
 								            self.exc_info = self.exc_info[1].exc_info
-												[utils] Make `ExtractorError` mutable

											
										
										
											2022-11-30 00:40:26 +00:00
+								        super().__init__(self.__msg)
-												[extractor] Show video id in error messages if possible

											
										
										
											2021-08-19 01:49:23 +00:00
-												[utils] Make `ExtractorError` mutable

											
										
										
											2022-11-30 00:40:26 +00:00
+								    @property
 								    def __msg(self):
 								        return ''.join((
 								            format_field(self.ie, None, '[%s] '),
 								            format_field(self.video_id, None, '%s: '),
 								            self.orig_msg,
 								            format_field(self.cause, None, ' (caused by %r)'),
 								            '' if self.expected else bug_reports_message()))
-												ExtractorError for errors during extraction

											
										
										
											2013-01-01 19:27:53 +00:00
-												Make ExtractorError usable for other causes

											
										
										
											2013-01-03 14:39:55 +00:00
+								    def format_traceback(self):
-												[utils] Better traceback for `ExtractorError`

											
										
										
											2022-03-08 06:34:49 +00:00
+								        return join_nonempty(
 								            self.traceback and ''.join(traceback.format_tb(self.traceback)),
-												[utils] ExtractorError: Fix for older python versions

Closes #2993

											
										
										
											2022-03-09 01:12:14 +00:00
+								            self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
-												[utils] Better traceback for `ExtractorError`

											
										
										
											2022-03-08 06:34:49 +00:00
+								            delim='\n') or None
-												Make ExtractorError usable for other causes

											
										
										
											2013-01-03 14:39:55 +00:00
-												[utils] Make `ExtractorError` mutable

											
										
										
											2022-11-30 00:40:26 +00:00
+								    def __setattr__(self, name, value):
 								        super().__setattr__(name, value)
 								        if getattr(self, 'msg', None) and name not in ('msg', 'args'):
 								            self.msg = self.__msg or type(self).__name__
 								            self.args = (self.msg, )  # Cannot be property
-												ExtractorError for errors during extraction

											
										
										
											2013-01-01 19:27:53 +00:00
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											2014-12-30 18:35:35 +00:00
+								class UnsupportedError(ExtractorError):
 								    def __init__(self, url):
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        super().__init__(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            f'Unsupported URL: {url}', expected=True)
-												Add documentation about supported sites (Fixes #4503)

											
										
										
											2014-12-30 18:35:35 +00:00
+								        self.url = url
-												[vimeo] Fix pro videos and player.vimeo.com urls

The old process can still be used for those videos.
Added RegexNotFoundError, which is raised by _search_regex if it can't extract the info.

											
										
										
											2013-10-23 12:38:03 +00:00
+								class RegexNotFoundError(ExtractorError):
 								    """Error when a regex didn't match"""
 								    pass
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								class GeoRestrictedError(ExtractorError):
 								    """Geographic restriction Error exception.
 								    This exception may be thrown when a video is not available from your
 								    geographic location due to geographic restrictions imposed by a website.
 								    """
-												[mtv] fix mtv.com and more(?)


											
										
										
											2020-10-09 05:06:49 +00:00
-												[extractor] Fix some errors being converted to `ExtractorError`

											
										
										
											2021-10-26 14:47:29 +00:00
+								    def __init__(self, msg, countries=None, **kwargs):
 								        kwargs['expected'] = True
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        super().__init__(msg, **kwargs)
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        self.countries = countries
-												[youtube,twitch] Allow waiting for channels to become live

Closes #2597

											
										
										
											2022-07-26 03:53:10 +00:00
+								class UserNotLive(ExtractorError):
 								    """Error when a channel/user is not live"""
 								    def __init__(self, msg=None, **kwargs):
 								        kwargs['expected'] = True
 								        super().__init__(msg or 'The channel is not currently live', **kwargs)
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class DownloadError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Download Error exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    This exception may be thrown by FileDownloader objects if they are not
 								    configured to continue on errors. They will contain the appropriate
 								    error message.
 								    """
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Bubble up all the stack of exceptions and retry download tests on timeout errors

											
										
										
											2013-03-09 09:05:43 +00:00
+								    def __init__(self, msg, exc_info=None):
 								        """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        super().__init__(msg)
-												Bubble up all the stack of exceptions and retry download tests on timeout errors

											
										
										
											2013-03-09 09:05:43 +00:00
+								        self.exc_info = exc_info
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Ability to load playlist infojson

* If `--no-clean-infojson` is given, the video ids are saved/loaded from in the infojson along with their playlist index
* If a video entry that was not saved is requested, we fallback to using `webpage_url` to re-extract the entries

Related: https://github.com/yt-dlp/yt-dlp/issues/190#issuecomment-804921024

											
										
										
											2021-03-23 19:45:53 +00:00
+								class EntryNotInPlaylist(YoutubeDLError):
 								    """Entry not in playlist exception.
 								    This exception will be thrown by YoutubeDL when a requested entry
 								    is not found in the playlist info_dict
 								    """
-												[cleanup] Minor improvements to error and debug messages

											
										
										
											2021-11-09 22:49:33 +00:00
+								    msg = 'Entry not found in info'
-												Ability to load playlist infojson

* If `--no-clean-infojson` is given, the video ids are saved/loaded from in the infojson along with their playlist index
* If a video entry that was not saved is requested, we fallback to using `webpage_url` to re-extract the entries

Related: https://github.com/yt-dlp/yt-dlp/issues/190#issuecomment-804921024

											
										
										
											2021-03-23 19:45:53 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class SameFileError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Same File exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    This exception will be thrown by FileDownloader objects if they detect
 								    multiple files would have to be downloaded to the same file on disk.
 								    """
-												[cleanup] Minor improvements to error and debug messages

											
										
										
											2021-11-09 22:49:33 +00:00
+								    msg = 'Fixed output name but more than one file to download'
 								    def __init__(self, filename=None):
 								        if filename is not None:
 								            self.msg += f': {filename}'
 								        super().__init__(self.msg)
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class PostProcessingError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Post Processing exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    This exception may be raised by PostProcessor's .run() method to
 								    indicate an error in the postprocessing task.
 								    """
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
+								class DownloadCancelled(YoutubeDLError):
 								    """ Exception raised when the download queue should be interrupted """
 								    msg = 'The download was cancelled'
-												Added option `--break-on-reject`

and modified `--break-on-existing`

											
										
										
											2021-01-13 01:01:01 +00:00
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
+								class ExistingVideoReached(DownloadCancelled):
 								    """ --break-on-existing triggered """
 								    msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
-												Added option `--break-on-reject`

and modified `--break-on-existing`

											
										
										
											2021-01-13 01:01:01 +00:00
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
 								class RejectedVideoReached(DownloadCancelled):
-												Add option `--break-match-filters`

* Deprecates `--break-on-reject`

Closes #5962

											
										
										
											2023-03-03 19:43:05 +00:00
+								    """ --break-match-filter triggered """
 								    msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
-												Add option `--throttled-rate` below which video data is re-extracted
Currently only for HTTP downloads

Closes #430, workaround for https://github.com/ytdl-org/youtube-dl/issues/29326

											
										
										
											2021-06-22 23:11:09 +00:00
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
+								class MaxDownloadsReached(DownloadCancelled):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """ --max-downloads limit has been reached. """
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
+								    msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
-												Option `--wait-for-video` to wait for scheduled streams

											
										
										
											2021-11-28 18:57:44 +00:00
+								class ReExtractInfo(YoutubeDLError):
 								    """ Video info needs to be re-extracted. """
 								    def __init__(self, msg, expected=False):
 								        super().__init__(msg)
 								        self.expected = expected
 								class ThrottledDownload(ReExtractInfo):
-												[utils] Create `DownloadCancelled` exception
as super-class of ExistingVideoReached, RejectedVideoReached, MaxDownloadsReached

Third parties can also sub-class this to cancel the download queue from a hook

											
										
										
											2021-10-26 14:45:12 +00:00
+								    """ Download speed below --throttled-rate. """
-												[cleanup] Minor improvements to error and debug messages

											
										
										
											2021-11-09 22:49:33 +00:00
+								    msg = 'The download speed is below throttle limit'
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Fix `--throttled-rate`

											
										
										
											2021-12-02 21:22:03 +00:00
+								    def __init__(self):
 								        super().__init__(self.msg, expected=False)
-												Option `--wait-for-video` to wait for scheduled streams

											
										
										
											2021-11-28 18:57:44 +00:00
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class UnavailableVideoError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Unavailable Format exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    This exception will be thrown when a video is requested
 								    in a format that is not available for that video.
 								    """
-												[cleanup] Minor improvements to error and debug messages

											
										
										
											2021-11-09 22:49:33 +00:00
+								    msg = 'Unable to download video'
 								    def __init__(self, err=None):
 								        if err is not None:
 								            self.msg += f': {err}'
 								        super().__init__(self.msg)
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class ContentTooShortError(YoutubeDLError):
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    """Content Too Short exception.
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    This exception may be raised by FileDownloader objects when a file they
 								    download is too small for what the server announced first, indicating
 								    the connection was probably interrupted.
 								    """
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								    def __init__(self, downloaded, expected):
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
-												Remove redundant (and wrong) class parameters
											
										
										
											2015-07-26 14:37:51 +00:00
+								        # Both in bytes
-												Convert all tabs to 4 spaces (PEP8)

											
										
										
											2012-11-28 01:04:46 +00:00
+								        self.downloaded = downloaded
 								        self.expected = expected
-												Split code as a package, compiled into an executable zip

											
										
										
											2012-03-25 01:07:37 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class XAttrMetadataError(YoutubeDLError):
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								    def __init__(self, code=None, msg='Unknown error'):
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        super().__init__(msg)
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								        self.code = code
-												[utils] Fix xattr error handling

											
										
										
											2016-10-01 19:03:41 +00:00
+								        self.msg = msg
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
 								        # Parsing code and msg
-												Fix W504 and disable W503 (closes #20863)

											
										
										
											2019-05-10 20:56:22 +00:00
+								        if (self.code in (errno.ENOSPC, errno.EDQUOT)
-												Updated to release 2020.11.21.1

											
										
										
											2020-11-21 14:50:42 +00:00
+								                or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								            self.reason = 'NO_SPACE'
 								        elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
 								            self.reason = 'VALUE_TOO_LONG'
 								        else:
 								            self.reason = 'NOT_SUPPORTED'
-												[utils] Introduce YoutubeDLError base class for all youtube-dl exceptions

											
										
										
											2016-10-17 11:38:37 +00:00
+								class XAttrUnavailableError(YoutubeDLError):
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								    pass
-												Fix bug in ae1035646a6be09c2aed3e22eb8910f341ddacfe

Closes #4881

											
										
										
											2022-09-09 17:44:20 +00:00
+								def is_path_like(f):
 								    return isinstance(f, (str, bytes, os.PathLike))
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								def extract_timezone(date_str, default=None):
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    m = re.search(
-												[utils] Improve `extract_timezone`
Code taken from: https://github.com/ytdl-org/youtube-dl/pull/29845
Fixes: https://github.com/ytdl-org/youtube-dl/issues/29948
Authored by: dirkf

											
										
										
											2021-09-19 12:15:41 +00:00
+								        r'''(?x)
 								            ^.{8,}?                                              # >=8 char non-TZ prefix, if present
 								            (?P<tz>Z|                                            # just the UTC Z, or
 								                (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
 								                   (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
 								                   [ ]?                                          # optional space
 								                (?P<sign>\+|-)                                   # +/-
 								                (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
 								            $)
 								        ''', date_str)
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								    timezone = None
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    if not m:
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								        m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 								        timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
 								        if timezone is not None:
 								            date_str = date_str[:-len(m.group('tz'))]
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								            timezone = dt.timedelta(hours=timezone)
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    else:
 								        date_str = date_str[:-len(m.group('tz'))]
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								        if m.group('sign'):
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								            sign = 1 if m.group('sign') == '+' else -1
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            timezone = dt.timedelta(
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								                hours=sign * int(m.group('hours')),
 								                minutes=sign * int(m.group('minutes')))
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
 								    if timezone is None and default is not NO_DEFAULT:
 								        timezone = default or dt.timedelta()
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    return timezone, date_str
-												[camdemy] Simplify and make more robust (#4938)

Do not throw errors if view count or upload date extraction fails.
Dispose of re.MULTILINE, which had absolutely no effect without any ^ or $ in sight.
Follow PEP8 naming conventions.

											
										
										
											2015-02-12 07:55:06 +00:00
+								def parse_iso8601(date_str, delimiter='T', timezone=None):
-												[instagram] Fix info_dict key name

											
										
										
											2014-03-24 00:40:09 +00:00
+								    """ Return a UNIX timestamp from the given date """
 								    if date_str is None:
 								        return None
-												[utils] Improve parse_iso8601

											
										
										
											2015-10-28 15:40:22 +00:00
+								    date_str = re.sub(r'\.[0-9]+', '', date_str)
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								    timezone, date_str = extract_timezone(date_str, timezone)
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
-												[ie/youtube] Extract upload timestamp if available (#9856)

Closes #4962, Closes #9829
Authored by: coletdjnz
											
										
										
											2024-05-26 21:13:12 +00:00
+								    with contextlib.suppress(ValueError, TypeError):
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								        dt_ = dt.datetime.strptime(date_str, date_format) - timezone
 								        return calendar.timegm(dt_.timetuple())
-												[instagram] Fix info_dict key name

											
										
										
											2014-03-24 00:40:09 +00:00
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								def date_formats(day_first=True):
 								    return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											2014-12-12 01:57:36 +00:00
+								def unified_strdate(date_str, day_first=True):
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											2013-04-27 13:14:20 +00:00
+								    """Return a string with the date in the format YYYYMMDD"""
-												[videolectures] (New extractor)

											
										
										
											2014-03-21 13:38:37 +00:00
 								    if date_str is None:
 								        return None
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											2013-04-27 13:14:20 +00:00
+								    upload_date = None
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
+								    # Replace commas
-												Fix #2355 (date parsing with dashes)

											
										
										
											2014-02-09 17:09:57 +00:00
+								    date_str = date_str.replace(',', ' ')
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											2014-12-12 01:57:36 +00:00
+								    # Remove AM/PM + timezone
-												[wsj] Add new extractor (Fixes #4854)

											
										
										
											2015-02-03 09:58:28 +00:00
+								    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    _, date_str = extract_timezone(date_str)
-												[cinchcast] Add new extractor (Fixes #4428)

											
										
										
											2014-12-12 01:57:36 +00:00
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    for expression in date_formats(day_first):
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        with contextlib.suppress(ValueError):
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
-												Add support for direct links to a video (#1973)

											
										
										
											2013-12-17 11:33:55 +00:00
+								    if upload_date is None:
 								        timetuple = email.utils.parsedate_tz(date_str)
 								        if timetuple:
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								            with contextlib.suppress(ValueError):
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								                upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
-												[utils] unified_strdate: Return None if the date format can't be recognized (fixes #7340)

This issue was introduced with ae12bc3ebb4cb377c2b4337ec255e652b36f5143, it returned 'None'.

											
										
										
											2015-11-02 13:08:38 +00:00
+								    if upload_date is not None:
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        return str(upload_date)
-												Fix some IEs that didn't return the uploade_date in the YYYYMMDD format

Create a function unified_strdate in utils.py to fix these problems

											
										
										
											2013-04-27 13:14:20 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								def unified_timestamp(date_str, day_first=True):
-												[cleanup] Misc

Closes #6288, Closes #7197, Closes #7265, Closes #7353, Closes #5773
Authored by: mikf, freezboltz, pukkandan

											
										
										
											2023-06-21 03:51:20 +00:00
+								    if not isinstance(date_str, str):
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								        return None
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								    date_str = re.sub(r'\s+', ' ', re.sub(
 								        r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
-												[utils] Fix unified_timestamp for formats parsed by parsedate_tz()

											
										
										
											2016-08-05 03:41:55 +00:00
+								    pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    timezone, date_str = extract_timezone(date_str)
 								    # Remove AM/PM + timezone
 								    date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
-												[utils] Improve unified_timestamp
Seen at http://zaq1.pl/video/xev0e

											
										
										
											2017-04-30 14:07:30 +00:00
+								    # Remove unrecognized timezones from ISO 8601 alike timestamps
 								    m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 								    if m:
 								        date_str = date_str[:-len(m.group('tz'))]
-												[tennistv] Add support for tennistv.com

											
										
										
											2018-03-14 00:28:40 +00:00
+								    # Python only supports microseconds, so remove nanoseconds
 								    m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
 								    if m:
 								        date_str = m.group(1)
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    for expression in date_formats(day_first):
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        with contextlib.suppress(ValueError):
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
 								            return calendar.timegm(dt_.timetuple())
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
+								    timetuple = email.utils.parsedate_tz(date_str)
 								    if timetuple:
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								        return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
-												[utils] Add unified_timestamp

											
										
										
											2016-06-25 15:30:35 +00:00
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								def determine_ext(url, default_ext='unknown_video'):
-												[openload] Improve ext extraction


											
										
										
											2018-06-01 17:16:22 +00:00
+								    if url is None or '.' not in url:
-												[izlesene] Minor changes

											
										
										
											2014-08-01 12:08:09 +00:00
+								        return default_ext
-												[utils] Check ext with trailing slash against the list of known extensions

											
										
										
											2015-11-22 11:27:13 +00:00
+								    guess = url.partition('?')[0].rpartition('.')[2]
-												[3sat] Add support (Fixes #1001)

											
										
										
											2013-07-07 23:13:55 +00:00
+								    if re.match(r'^[A-Za-z0-9]+$', guess):
 								        return guess
-												[utils] Extract known extensions for reuse

											
										
										
											2016-01-03 19:08:34 +00:00
+								    # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
 								    elif guess.rstrip('/') in KNOWN_EXTENSIONS:
-												[utils] Check ext with trailing slash against the list of known extensions

											
										
										
											2015-11-22 11:27:13 +00:00
+								        return guess.rstrip('/')
-												[3sat] Add support (Fixes #1001)

											
										
										
											2013-07-07 23:13:55 +00:00
+								    else:
-												Use determine_ext when saving the thumbnail

Urls that contain a query produced filenames with wrong extensions

											
										
										
											2013-07-12 19:52:59 +00:00
+								        return default_ext
-												[3sat] Add support (Fixes #1001)

											
										
										
											2013-07-07 23:13:55 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[utils] Improve subtitles_filename (closes #22753)

											
										
										
											2019-10-17 21:03:53 +00:00
+								def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
 								    return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
-												Add a post processor for embedding subtitles in mp4 videos (closes #1052)

											
										
										
											2013-07-20 10:48:57 +00:00
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
-												Fix `--date today`

Closes #3704

											
										
										
											2022-05-11 00:22:31 +00:00
+								    R"""
 								    Return a datetime object from a string.
 								    Supported format:
 								        (now|today|yesterday|DATE)([+-]\d+(microsecond|second|minute|hour|day|week|month|year)s?)?
 								    @param format       strftime format of DATE
 								    @param precision    Round the datetime object: auto|microsecond|second|minute|hour|day
 								                        auto: round to the unit provided in date_str (if applicable).
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    """
 								    auto_precision = False
 								    if precision == 'auto':
 								        auto_precision = True
 								        precision = 'microsecond'
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
-												[utils] Add "yesterday" as a date keyword

											
										
										
											2014-12-11 09:29:30 +00:00
+								    if date_str in ('now', 'today'):
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								        return today
-												[utils] Add "yesterday" as a date keyword

											
										
										
											2014-12-11 09:29:30 +00:00
+								    if date_str == 'yesterday':
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								        return today - dt.timedelta(days=1)
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    match = re.match(
-												Fix `--date today`

Closes #3704

											
										
										
											2022-05-11 00:22:31 +00:00
+								        r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?',
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								        date_str)
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								    if match is not None:
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								        start_time = datetime_from_str(match.group('start'), precision, format)
 								        time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								        unit = match.group('unit')
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								        if unit == 'month' or unit == 'year':
 								            new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								            unit = 'day'
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								        else:
 								            if unit == 'week':
 								                unit = 'day'
 								                time *= 7
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            delta = dt.timedelta(**{unit + 's': time})
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								            new_date = start_time + delta
 								        if auto_precision:
 								            return datetime_round(new_date, unit)
 								        return new_date
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    return datetime_round(dt.datetime.strptime(date_str, format), precision)
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
-												[utils] Validate `DateRange` input

Closes #2641

											
										
										
											2022-02-11 21:10:49 +00:00
+								def date_from_str(date_str, format='%Y%m%d', strict=False):
-												Fix `--date today`

Closes #3704

											
										
										
											2022-05-11 00:22:31 +00:00
+								    R"""
 								    Return a date object from a string using datetime_from_str
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
-												Fix `--date today`

Closes #3704

											
										
										
											2022-05-11 00:22:31 +00:00
+								    @param strict  Restrict allowed patterns to "YYYYMMDD" and
 								                   (now|today|yesterday)(-\d+(day|week|month|year)s?)?
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    """
-												Fix `--date today`

Closes #3704

											
										
										
											2022-05-11 00:22:31 +00:00
+								    if strict and not re.fullmatch(r'\d{8}|(now|today|yesterday)(-\d+(day|week|month|year)s?)?', date_str):
 								        raise ValueError(f'Invalid date format "{date_str}"')
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    return datetime_from_str(date_str, precision='microsecond', format=format).date()
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								def datetime_add_months(dt_, months):
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    """Increment/Decrement a datetime object by months."""
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    month = dt_.month + months - 1
 								    year = dt_.year + month // 12
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    month = month % 12 + 1
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    day = min(dt_.day, calendar.monthrange(year, month)[1])
 								    return dt_.replace(year, month, day)
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								def datetime_round(dt_, precision='day'):
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
+								    """
 								    Round a datetime object's time to a specific precision
 								    """
 								    if precision == 'microsecond':
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								        return dt_
-												[utils] Add `datetime_from_str` to parse relative time (#221)
and `datetime_add_months` to accurately add/subtract months

Authored by: colethedj

											
										
										
											2021-04-06 06:45:15 +00:00
 								    unit_seconds = {
 								        'day': 86400,
 								        'hour': 3600,
 								        'minute': 60,
 								        'second': 1,
 								    }
 								    roundto = lambda x, n: ((x + n / 2) // n) * n
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
 								    return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Added '--xattrs' option which writes metadata to the file's extended attributes using a youtube-dl postprocessor.
Works on Linux, OSX, and Windows.

											
										
										
											2014-01-02 12:47:28 +00:00
+								def hyphenate_date(date_str):
 								    """
 								    Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 								    match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 								    if match is not None:
 								        return '-'.join(match.groups())
 								    else:
 								        return date_str
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								class DateRange:
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								    """Represents a time interval between two dates"""
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								    def __init__(self, start=None, end=None):
 								        """start and end must be strings in the format accepted by date"""
 								        if start is not None:
-												[utils] Validate `DateRange` input

Closes #2641

											
										
										
											2022-02-11 21:10:49 +00:00
+								            self.start = date_from_str(start, strict=True)
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								        else:
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            self.start = dt.datetime.min.date()
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								        if end is not None:
-												[utils] Validate `DateRange` input

Closes #2641

											
										
										
											2022-02-11 21:10:49 +00:00
+								            self.end = date_from_str(end, strict=True)
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								        else:
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            self.end = dt.datetime.max.date()
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								        if self.start > self.end:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise ValueError(f'Date range: "{self}" , the start date must be before the end date')
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								    @classmethod
 								    def day(cls, day):
 								        """Returns a range that only contains the given day"""
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
+								        return cls(day, day)
-												Allow to select videos to download by their upload dates (related #137)

Only absolute dates.

											
										
										
											2013-04-27 12:01:55 +00:00
+								    def __contains__(self, date):
 								        """Check if the date is in the range"""
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								        if not isinstance(date, dt.date):
-												Allow to use relative dates in the format (now|today)[+-][0-9](day|week|month|year)(s)? (Closes #137)

Also fix DateRange not accepting ranges of one day.

											
										
										
											2013-04-28 09:39:37 +00:00
+								            date = date_from_str(date)
 								        return self.start <= date <= self.end
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
-												[devscripts/cli_to_api] Add script

											
										
										
											2023-05-24 17:59:30 +00:00
+								    def __repr__(self):
 								        return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											2013-08-28 10:57:10 +00:00
-												[utils] Improve `repr` of `DateRange`, `match_filter_func`

											
										
										
											2024-01-31 10:27:37 +00:00
+								    def __str__(self):
 								        return f'{self.start} to {self.end}'
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								    def __eq__(self, other):
 								        return (isinstance(other, DateRange)
 								                and self.start == other.start and self.end == other.end)
-												Fix platform name in Python 2 with --verbose (Closes #1228)

											
										
										
											2013-08-28 10:57:10 +00:00
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								@functools.cache
 								def system_identifier():
 								    python_implementation = platform.python_implementation()
 								    if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
 								        python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
-												Workaround `libc_ver` not be available on Windows Store version of Python

											
										
										
											2022-09-17 06:27:47 +00:00
+								    libc_ver = []
 								    with contextlib.suppress(OSError):  # We may not have access to the executable
 								        libc_ver = platform.libc_ver()
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return 'Python {} ({} {} {}) - {} ({}{})'.format(
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								        platform.python_version(),
 								        python_implementation,
-												[build] Create armv7l and aarch64 releases (#5449)

Closes #5436
Authored by: MrOctopus, pukkandan
											
										
										
											2022-11-11 01:49:24 +00:00
+								        platform.machine(),
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								        platform.architecture()[0],
 								        platform.platform(),
-												Backport SSL configuration from Python 3.10 (#5437)

Partial fix for https://github.com/yt-dlp/yt-dlp/pull/5294#issuecomment-1289363572, https://github.com/yt-dlp/yt-dlp/issues/4627

Authored by: coletdjnz
											
										
										
											2022-11-06 16:37:23 +00:00
+								        ssl.OPENSSL_VERSION,
 								        format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								    )
-												Merge remote-tracking branch 'rzhxeo/youporn-hd'

Conflicts:
	youtube_dl/utils.py

											
										
										
											2013-08-28 16:22:28 +00:00
-												[utils] Improve performance using `functools.cache`

Closes #3786

											
										
										
											2022-05-19 14:06:31 +00:00
+								@functools.cache
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
+								def get_windows_version():
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """ Get Windows version. returns () if it's not running on Windows """
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
+								    if compat_os_name == 'nt':
 								        return version_tuple(platform.win32_ver()[1])
 								    else:
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								        return ()
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
-												Use --encoding when outputting

											
										
										
											2014-04-07 17:57:42 +00:00
+								def write_string(s, out=None, encoding=None):
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								    assert isinstance(s, str)
 								    out = out or sys.stderr
-												[utils] `write_string`: Fix noconsole behavior

Ref: https://github.com/pyinstaller/pyinstaller/pull/7217

Authored by: Grub4K
											
										
										
											2023-03-07 21:34:07 +00:00
+								    # `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
 								    if not out:
 								        return
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											2013-09-16 04:55:33 +00:00
-												Bugfix for 59f943cd5097e9bdbc3cb3e6b5675e43d369341a

Fixes: https://github.com/yt-dlp/yt-dlp/commit/59f943cd5097e9bdbc3cb3e6b5675e43d369341a#commitcomment-73251597

											
										
										
											2022-05-10 05:38:19 +00:00
+								    if compat_os_name == 'nt' and supports_terminal_sequences(out):
-												[cleanup] Misc fixes (see desc)

* Do not warn when fixup is skipped for existing file
* [fragment] Fix `--skip-unavailable-fragments` for HTTP Errors
* [utils] write_string: Fix bug in 59f943cd5097e9bdbc3cb3e6b5675e43d369341a
* [utils] parse_codecs: Subtitle codec is generally referred to as `scodec`. https://github.com/yt-dlp/yt-dlp/pull/2174#discussion_r790156048
* [docs] Remove note about permissions. Closes #3597

											
										
										
											2022-04-29 16:02:31 +00:00
+								        s = re.sub(r'([\r\n]+)', r' \1', s)
-												[utils] `write_string`: Workaround newline issue in `conhost`

On windows `conhost`, when `WINDOWS_VT_MODE` is enabled, `\n` is not
actually sent if the window is exactly the length of printed line,
and the line does not end with a white-space character. So the
line-break disappears when resizing the window.

Fixes #1863

											
										
										
											2022-04-28 13:41:04 +00:00
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								    enc, buffer = None, out
-												[cleanup] Fix misc bugs (#8968)

Closes #8816

Authored by: bashonly, seproDev, pukkandan, Grub4k

											
										
										
											2024-03-10 14:22:49 +00:00
+								    # `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
 								    if 'b' in (getattr(out, 'mode', None) or ''):
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								        enc = encoding or preferredencoding()
-												Use our own encoding when writing strings

											
										
										
											2014-04-07 19:40:34 +00:00
+								    elif hasattr(out, 'buffer'):
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								        buffer = out.buffer
-												Use our own encoding when writing strings

											
										
										
											2014-04-07 19:40:34 +00:00
+								        enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								    buffer.write(s.encode(enc, 'ignore') if enc else s)
-												Always correct encoding when writing to sys.stderr (Fixes #1435)

											
										
										
											2013-09-16 04:55:33 +00:00
+								    out.flush()
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								# TODO: Use global logger
-												[utils] Add `deprecation_warning`

See https://github.com/yt-dlp/yt-dlp/pull/2173#issuecomment-1097021515

											
										
										
											2022-08-30 15:28:28 +00:00
+								def deprecation_warning(msg, *, printer=None, stacklevel=0, **kwargs):
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    from .. import _IN_CLI
-												[utils] Add `deprecation_warning`

See https://github.com/yt-dlp/yt-dlp/pull/2173#issuecomment-1097021515

											
										
										
											2022-08-30 15:28:28 +00:00
+								    if _IN_CLI:
 								        if msg in deprecation_warning._cache:
 								            return
 								        deprecation_warning._cache.add(msg)
 								        if printer:
 								            return printer(f'{msg}{bug_reports_message()}', **kwargs)
 								        return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
 								    else:
 								        import warnings
 								        warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
 								deprecation_warning._cache = set()
-												Allow changes to run under Python 3

											
										
										
											2013-08-28 12:28:55 +00:00
+								def bytes_to_intlist(bs):
 								    if not bs:
 								        return []
 								    if isinstance(bs[0], int):  # Python 3
 								        return list(bs)
 								    else:
 								        return [ord(c) for c in bs]
-												Merge remote-tracking branch 'rzhxeo/youporn-hd'

Conflicts:
	youtube_dl/utils.py

											
										
										
											2013-08-28 16:22:28 +00:00
-												Add intlist_to_bytes to utils.py
											
										
										
											2013-08-28 13:59:07 +00:00
+								def intlist_to_bytes(xs):
 								    if not xs:
 								        return b''
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								    return struct.pack('%dB' % len(xs), *xs)
-												[youtube] Simplify cache_dir code (#1529)

											
										
										
											2013-10-02 06:41:03 +00:00
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								class LockingUnsupportedError(OSError):
-												[cleanup] Misc fixes

Cherry-picks from: #3498, #3947
Related: #3949, https://github.com/yt-dlp/yt-dlp/issues/1839#issuecomment-1140313836
Authored by: pukkandan, flashdagger, gamer191

											
										
										
											2022-06-03 15:59:03 +00:00
+								    msg = 'File locking is not supported'
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
 								    def __init__(self):
 								        super().__init__(self.msg)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								# Cross-platform file locking
 								if sys.platform == 'win32':
-												Import ctypes only when necessary

Closes #4541

											
										
										
											2022-08-03 12:17:38 +00:00
+								    import ctypes
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								    import ctypes.wintypes
 								    import msvcrt
 								    class OVERLAPPED(ctypes.Structure):
 								        _fields_ = [
 								            ('Internal', ctypes.wintypes.LPVOID),
 								            ('InternalHigh', ctypes.wintypes.LPVOID),
 								            ('Offset', ctypes.wintypes.DWORD),
 								            ('OffsetHigh', ctypes.wintypes.DWORD),
 								            ('hEvent', ctypes.wintypes.HANDLE),
 								        ]
-												[utils] Use local kernel32 for file locking on Windows

Ref: https://github.com/ytdl-org/youtube-dl/issues/21545

Authored by: Grub4K

											
										
										
											2023-01-25 21:32:07 +00:00
+								    kernel32 = ctypes.WinDLL('kernel32')
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								    LockFileEx = kernel32.LockFileEx
 								    LockFileEx.argtypes = [
 								        ctypes.wintypes.HANDLE,     # hFile
 								        ctypes.wintypes.DWORD,      # dwFlags
 								        ctypes.wintypes.DWORD,      # dwReserved
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        ctypes.POINTER(OVERLAPPED),  # Overlapped
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								    ]
 								    LockFileEx.restype = ctypes.wintypes.BOOL
 								    UnlockFileEx = kernel32.UnlockFileEx
 								    UnlockFileEx.argtypes = [
 								        ctypes.wintypes.HANDLE,     # hFile
 								        ctypes.wintypes.DWORD,      # dwReserved
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 								        ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        ctypes.POINTER(OVERLAPPED),  # Overlapped
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								    ]
 								    UnlockFileEx.restype = ctypes.wintypes.BOOL
 								    whole_low = 0xffffffff
 								    whole_high = 0x7fffffff
-												[utils] Improve file locking

* Implement non-blocking locks for windows
* Don't raise error when closing a closed file

											
										
										
											2022-03-03 13:27:38 +00:00
+								    def _lock_file(f, exclusive, block):
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								        overlapped = OVERLAPPED()
 								        overlapped.Offset = 0
 								        overlapped.OffsetHigh = 0
 								        overlapped.hEvent = 0
 								        f._lock_file_overlapped_p = ctypes.pointer(overlapped)
-												[utils] Improve file locking

* Implement non-blocking locks for windows
* Don't raise error when closing a closed file

											
										
										
											2022-03-03 13:27:38 +00:00
 								        if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
 								                          (0x2 if exclusive else 0x0) | (0x0 if block else 0x1),
 , whole_low, whole_high, f._lock_file_overlapped_p):
-												[utils] `locked_file`: Fix for PyPy on Windows

											
										
										
											2022-06-13 11:57:31 +00:00
+								            # NB: No argument form of "ctypes.FormatError" does not work on PyPy
 								            raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
 								    def _unlock_file(f):
 								        assert f._lock_file_overlapped_p
 								        handle = msvcrt.get_osfhandle(f.fileno())
-												[utils] Improve file locking

* Implement non-blocking locks for windows
* Don't raise error when closing a closed file

											
										
										
											2022-03-03 13:27:38 +00:00
+								        if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise OSError(f'Unlocking file failed: {ctypes.FormatError()!r}')
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
 								else:
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											2016-02-20 19:28:25 +00:00
+								    try:
 								        import fcntl
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
+								        def _lock_file(f, exclusive, block):
-												[utils] locked_file: Fix non-blocking non-exclusive lock

											
										
										
											2022-04-07 06:30:58 +00:00
+								            flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
 								            if not block:
 								                flags |= fcntl.LOCK_NB
-												[utils] Fix file locking for AOSP (#2714)

Closes #2080, #2670

Authored by: jakeogh
											
										
										
											2022-03-03 13:09:00 +00:00
+								            try:
-												[utils] locked_file: Fix non-blocking non-exclusive lock

											
										
										
											2022-04-07 06:30:58 +00:00
+								                fcntl.flock(f, flags)
-												[utils] Fix file locking for AOSP (#2714)

Closes #2080, #2670

Authored by: jakeogh
											
										
										
											2022-03-03 13:09:00 +00:00
+								            except BlockingIOError:
 								                raise
 								            except OSError:  # AOSP does not have flock()
-												[utils] locked_file: Fix non-blocking non-exclusive lock

											
										
										
											2022-04-07 06:30:58 +00:00
+								                fcntl.lockf(f, flags)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											2016-02-20 19:28:25 +00:00
+								        def _unlock_file(f):
-												[utils] `locked_file`: Fix for virtiofs (#6840)

Authored by: brandon-dacrib
Closes #6823
											
										
										
											2023-05-05 05:31:41 +00:00
+								            with contextlib.suppress(OSError):
 								                return fcntl.flock(f, fcntl.LOCK_UN)
 								            with contextlib.suppress(OSError):
 								                return fcntl.lockf(f, fcntl.LOCK_UN)  # AOSP does not have flock()
 								            return fcntl.flock(f, fcntl.LOCK_UN | fcntl.LOCK_NB)  # virtiofs needs LOCK_NB on unlocking
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											2016-02-20 19:28:25 +00:00
+								    except ImportError:
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
+								        def _lock_file(f, exclusive, block):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise LockingUnsupportedError
-												[utils] Jython support: tolerate missing fcntl module

											
										
										
											2016-02-20 19:28:25 +00:00
 								        def _unlock_file(f):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise LockingUnsupportedError
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								class locked_file:
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    locked = False
-												[utils] Improve file locking

* Implement non-blocking locks for windows
* Don't raise error when closing a closed file

											
										
										
											2022-03-03 13:27:38 +00:00
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
+								    def __init__(self, filename, mode, block=True, encoding=None):
-												[utils] locked_file: Do not truncate files before locking (#2994)

Authored by: jakeogh, pukkandan
											
										
										
											2022-04-07 05:58:56 +00:00
+								        if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
 								            raise NotImplementedError(mode)
 								        self.mode, self.block = mode, block
 								        writable = any(f in mode for f in 'wax+')
 								        readable = any(f in mode for f in 'r+')
 								        flags = functools.reduce(operator.ior, (
 								            getattr(os, 'O_CLOEXEC', 0),  # UNIX only
 								            getattr(os, 'O_BINARY', 0),  # Windows only
 								            getattr(os, 'O_NOINHERIT', 0),  # Windows only
 								            os.O_CREAT if writable else 0,  # O_TRUNC only after locking
 								            os.O_APPEND if 'a' in mode else 0,
 								            os.O_EXCL if 'x' in mode else 0,
 								            os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
 								        ))
-												[utils] locked_file: Do not give executable bits for newly created files

Authored by: Lesmiscore

											
										
										
											2022-04-09 16:23:27 +00:00
+								        self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
 								    def __enter__(self):
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
+								        exclusive = 'r' not in self.mode
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								        try:
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
+								            _lock_file(self.f, exclusive, self.block)
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								            self.locked = True
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        except OSError:
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								            self.f.close()
 								            raise
-												[utils] locked_file: Do not truncate files before locking (#2994)

Authored by: jakeogh, pukkandan
											
										
										
											2022-04-07 05:58:56 +00:00
+								        if 'w' in self.mode:
-												[utils] `locked_file`: Ignore illegal seek on `truncate` (#3610)

Closes #3557

Authored by: jakeogh
											
										
										
											2022-05-01 20:31:06 +00:00
+								            try:
 								                self.f.truncate()
 								            except OSError as e:
-												[cleanup] Misc fixes

Cherry-picks from: #3498, #3947
Related: #3949, https://github.com/yt-dlp/yt-dlp/issues/1839#issuecomment-1140313836
Authored by: pukkandan, flashdagger, gamer191

											
										
										
											2022-06-03 15:59:03 +00:00
+								                if e.errno not in (
 								                    errno.ESPIPE,  # Illegal seek - expected for FIFO
 								                    errno.EINVAL,  # Invalid argument - expected for /dev/null
 								                ):
 								                    raise
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								        return self
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    def unlock(self):
 								        if not self.locked:
 								            return
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								        try:
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								            _unlock_file(self.f)
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
+								        finally:
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								            self.locked = False
-												Add basic --download-archive option

Often, users want to be able to download only videos they haven't seen before, despite the video files having been deleted or moved in the mean time.
When --download-archive FILE is given, the extractor and ID of every download is recorded in the specified file. If it is already present, the video in question is skipped.

											
										
										
											2013-10-06 02:27:09 +00:00
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    def __exit__(self, *_):
 								        try:
 								            self.unlock()
 								        finally:
 								            self.f.close()
-												FFmpegPostProcessor: print the command line used if the --verbose option is given

											
										
										
											2013-10-12 11:49:27 +00:00
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    open = __enter__
 								    close = __exit__
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    def __getattr__(self, attr):
 								        return getattr(self.f, attr)
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
-												Do not prevent download if locking is unsupported

Closes #3022

Failure to lock download-archive is still fatal.
This is consistent with youtube-dl's behavior

											
										
										
											2022-04-05 17:38:18 +00:00
+								    def __iter__(self):
 								        return iter(self.f)
-												[utils] Use `locked_file` for `sanitize_open` (#1066)

Authored by: jakeogh
											
										
										
											2022-02-05 10:45:51 +00:00
-												FFmpegPostProcessor: print the command line used if the --verbose option is given

											
										
										
											2013-10-12 11:49:27 +00:00
-												[utils] Improve performance using `functools.cache`

Closes #3786

											
										
										
											2022-05-19 14:06:31 +00:00
+								@functools.cache
-												[core] Decode environment variables with filesystem encoding (Fixes #3854, Fixes #3217, Fixes #2918)

Introduces compat versions of os.getenv and os.path.expanduser

											
										
										
											2014-09-30 15:27:53 +00:00
+								def get_filesystem_encoding():
 								    encoding = sys.getfilesystemencoding()
 								    return encoding if encoding is not None else 'utf-8'
-												[core/windows] Improve shell quoting and tests (#9802)

Authored by: Grub4K
											
										
										
											2024-04-27 08:37:26 +00:00
+								_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								_CMD_QUOTE_TRANS = str.maketrans({
 								    # Keep quotes balanced by replacing them with `""` instead of `\\"`
 								    '"': '""',
-												[core/windows] Improve shell quoting and tests (#9802)

Authored by: Grub4K
											
										
										
											2024-04-27 08:37:26 +00:00
+								    # These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								    # `=` should be unique since variables containing `=` cannot be set using cmd
 								    '\n': '%=%',
-												[core/windows] Improve shell quoting and tests (#9802)

Authored by: Grub4K
											
										
										
											2024-04-27 08:37:26 +00:00
+								    '\r': '%=%',
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								    # Use zero length variable replacement so `%` doesn't get expanded
 								    # `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
 								    '%': '%%cd:~,%',
 								})
 								def shell_quote(args, *, shell=False):
 								    args = list(variadic(args))
 								    if compat_os_name != 'nt':
 								        return shlex.join(args)
 								    trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
 								    return ' '.join(
-												[core/windows] Improve shell quoting and tests (#9802)

Authored by: Grub4K
											
										
										
											2024-04-27 08:37:26 +00:00
+								        s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
 								        else re.sub(r'(\\+)("|$)', r'\1\1\2', s).translate(trans).join('""')
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								        for s in args)
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											2013-10-15 10:05:13 +00:00
 								def smuggle_url(url, data):
 								    """ Pass additional data in a URL for internal use. """
-												[kaltura] add support videos stored on custom kaltura servers(closes #5557)

											
										
										
											2016-07-04 16:57:44 +00:00
+								    url, idata = unsmuggle_url(url, {})
 								    data.update(idata)
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    sdata = urllib.parse.urlencode(
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								        {'__youtubedl_smuggle': json.dumps(data)})
 								    return url + '#' + sdata
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											2013-10-15 10:05:13 +00:00
-												Use original Referer URL in Brightcove requests (Fixes #2110)

											
										
										
											2014-01-07 04:34:14 +00:00
+								def unsmuggle_url(smug_url, default=None):
-												Fix PEP8 issue E713

											
										
										
											2014-12-09 22:11:26 +00:00
+								    if '#__youtubedl_smuggle' not in smug_url:
-												Use original Referer URL in Brightcove requests (Fixes #2110)

											
										
										
											2014-01-07 04:34:14 +00:00
+								        return smug_url, default
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								    url, _, sdata = smug_url.rpartition('#')
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
-												[generic] Support embedded vimeo videos (#1602)

											
										
										
											2013-10-15 10:05:13 +00:00
+								    data = json.loads(jsond)
 								    return url, data
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											2013-11-25 02:12:26 +00:00
-												[outtmpl] Add alternate forms `F`, `D`
and improve `id` detection

F = sanitize as filename (# = restricted)
D = add Decimal suffixes

Closes #2085, 2081

											
										
										
											2021-12-23 01:14:42 +00:00
+								def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
 								    """ Formats numbers with decimal sufixes like K, M, etc """
 								    num, factor = float_or_none(num), float(factor)
-												Handle negative duration from extractor

Closes #2921

											
										
										
											2022-03-04 14:10:10 +00:00
+								    if num is None or num < 0:
-												[outtmpl] Add alternate forms `F`, `D`
and improve `id` detection

F = sanitize as filename (# = restricted)
D = add Decimal suffixes

Closes #2085, 2081

											
										
										
											2021-12-23 01:14:42 +00:00
+								        return None
-												[utils] `format_decimal_suffix`: Fix for very large numbers (#3109)

Authored by: s0u1h
											
										
										
											2022-03-18 21:03:09 +00:00
+								    POSSIBLE_SUFFIXES = 'kMGTPEZY'
 								    exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
 								    suffix = ['', *POSSIBLE_SUFFIXES][exponent]
-												[outtmpl] Alternate form for `D` and fix suffix's case

Fixes: https://github.com/yt-dlp/yt-dlp/issues/2085#issuecomment-1002247689, https://github.com/yt-dlp/yt-dlp/pull/2132/files#r775729811

											
										
										
											2021-12-30 03:13:40 +00:00
+								    if factor == 1024:
 								        suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
-												[outtmpl] Add alternate forms `F`, `D`
and improve `id` detection

F = sanitize as filename (# = restricted)
D = add Decimal suffixes

Closes #2085, 2081

											
										
										
											2021-12-23 01:14:42 +00:00
+								    converted = num / (factor ** exponent)
-												[outtmpl] Alternate form for `D` and fix suffix's case

Fixes: https://github.com/yt-dlp/yt-dlp/issues/2085#issuecomment-1002247689, https://github.com/yt-dlp/yt-dlp/pull/2132/files#r775729811

											
										
										
											2021-12-30 03:13:40 +00:00
+								    return fmt % (converted, suffix)
-												[outtmpl] Add alternate forms `F`, `D`
and improve `id` detection

F = sanitize as filename (# = restricted)
D = add Decimal suffixes

Closes #2085, 2081

											
										
										
											2021-12-23 01:14:42 +00:00
-												[zdf/common] Use API in ZDF extractor.

This also comes with a lot of extra format fields
Fixes #1518

											
										
										
											2013-11-25 02:12:26 +00:00
+								def format_bytes(bytes):
-												[utils] Fix `format_bytes` output for Bytes (#2132)

Authored by: pukkandan, mdawar

											
										
										
											2021-12-27 22:08:31 +00:00
+								    return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
-												[dailymotion] Extract view count (#1895)

											
										
										
											2013-12-06 12:36:36 +00:00
-												Improve --bidi-workaround support

											
										
										
											2013-12-09 17:29:07 +00:00
-												[utils] Move `FileDownloader.parse_bytes` into utils

											
										
										
											2022-11-17 03:10:34 +00:00
+								def lookup_unit_table(unit_table, s, strict=False):
 								    num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
+								    units_re = '|'.join(re.escape(u) for u in unit_table)
-												[utils] Move `FileDownloader.parse_bytes` into utils

											
										
										
											2022-11-17 03:10:34 +00:00
+								    m = (re.fullmatch if strict else re.match)(
 								        rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
+								    if not m:
 								        return None
-												[utils] Move `FileDownloader.parse_bytes` into utils

											
										
										
											2022-11-17 03:10:34 +00:00
 								    num = float(m.group('num').replace(',', '.'))
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
+								    mult = unit_table[m.group('unit')]
-												[utils] Move `FileDownloader.parse_bytes` into utils

											
										
										
											2022-11-17 03:10:34 +00:00
+								    return round(num * mult)
 								def parse_bytes(s):
 								    """Parse a string indicating a byte quantity into an integer"""
 								    return lookup_unit_table(
 								        {u: 1024**i for i, u in enumerate(['', *'KMGTPEZY'])},
 								        s.upper(), strict=True)
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								def parse_filesize(s):
 								    if s is None:
 								        return None
-												Fix typos

Closes #8200.

											
										
										
											2016-01-10 15:17:47 +00:00
+								    # The lower-case forms are of course incorrect and unofficial,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								    # but we support those too
 								    _UNIT_TABLE = {
 								        'B': 1,
 								        'b': 1,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'bytes': 1,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'KiB': 1024,
 								        'KB': 1000,
 								        'kB': 1024,
 								        'Kb': 1000,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'kb': 1000,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'kilobytes': 1000,
 								        'kibibytes': 1024,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'MiB': 1024 ** 2,
 								        'MB': 1000 ** 2,
 								        'mB': 1024 ** 2,
 								        'Mb': 1000 ** 2,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'mb': 1000 ** 2,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'megabytes': 1000 ** 2,
 								        'mebibytes': 1024 ** 2,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'GiB': 1024 ** 3,
 								        'GB': 1000 ** 3,
 								        'gB': 1024 ** 3,
 								        'Gb': 1000 ** 3,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'gb': 1000 ** 3,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'gigabytes': 1000 ** 3,
 								        'gibibytes': 1024 ** 3,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'TiB': 1024 ** 4,
 								        'TB': 1000 ** 4,
 								        'tB': 1024 ** 4,
 								        'Tb': 1000 ** 4,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'tb': 1000 ** 4,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'terabytes': 1000 ** 4,
 								        'tebibytes': 1024 ** 4,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'PiB': 1024 ** 5,
 								        'PB': 1000 ** 5,
 								        'pB': 1024 ** 5,
 								        'Pb': 1000 ** 5,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'pb': 1000 ** 5,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'petabytes': 1000 ** 5,
 								        'pebibytes': 1024 ** 5,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'EiB': 1024 ** 6,
 								        'EB': 1000 ** 6,
 								        'eB': 1024 ** 6,
 								        'Eb': 1000 ** 6,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'eb': 1000 ** 6,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'exabytes': 1000 ** 6,
 								        'exbibytes': 1024 ** 6,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'ZiB': 1024 ** 7,
 								        'ZB': 1000 ** 7,
 								        'zB': 1024 ** 7,
 								        'Zb': 1000 ** 7,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'zb': 1000 ** 7,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'zettabytes': 1000 ** 7,
 								        'zebibytes': 1024 ** 7,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        'YiB': 1024 ** 8,
 								        'YB': 1000 ** 8,
 								        'yB': 1024 ** 8,
 								        'Yb': 1000 ** 8,
-												[utils] Recognize lowercase units in parse_filesize

											
										
										
											2016-08-18 16:32:00 +00:00
+								        'yb': 1000 ** 8,
-												[utils] Recognize units with full names in parse_filename

Reference: https://en.wikipedia.org/wiki/Template:Quantities_of_bytes

											
										
										
											2016-08-19 16:12:32 +00:00
+								        'yottabytes': 1000 ** 8,
 								        'yobibytes': 1024 ** 8,
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								    }
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
+								    return lookup_unit_table(_UNIT_TABLE, s)
 								def parse_count(s):
 								    if s is None:
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
+								        return None
-												[utils] Improve `parse_count`

											
										
										
											2021-12-23 21:32:50 +00:00
+								    s = re.sub(r'^[^\d]+\s', '', s).strip()
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
 								    if re.match(r'^[\d,.]+$', s):
 								        return str_to_int(s)
 								    _UNIT_TABLE = {
 								        'k': 1000,
 								        'K': 1000,
 								        'm': 1000 ** 2,
 								        'M': 1000 ** 2,
 								        'kk': 1000 ** 2,
 								        'KK': 1000 ** 2,
-												[utils] Improve `parse_count`

											
										
										
											2021-12-23 21:32:50 +00:00
+								        'b': 1000 ** 3,
 								        'B': 1000 ** 3,
-												[bbc] Generalize unit table lookup and add parse_count

											
										
										
											2016-03-13 10:27:20 +00:00
+								    }
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
-												[utils] Improve `parse_count`

											
										
										
											2021-12-23 21:32:50 +00:00
+								    ret = lookup_unit_table(_UNIT_TABLE, s)
 								    if ret is not None:
 								        return ret
 								    mobj = re.match(r'([\d,.]+)(?:$|\s)', s)
 								    if mobj:
 								        return str_to_int(mobj.group(1))
-												[xminus] Simplify and extend (#4302)

											
										
										
											2014-11-25 08:54:54 +00:00
-												[utils] PEP 8

											
										
										
											2016-03-13 11:23:08 +00:00
-												[niconico] Fix extraction of thumbnails and uploader (#3266)


											
										
										
											2022-04-01 10:31:58 +00:00
+								def parse_resolution(s, *, lenient=False):
-												[utils] Add parse_resolution

											
										
										
											2018-03-02 16:39:04 +00:00
+								    if s is None:
 								        return {}
-												[niconico] Fix extraction of thumbnails and uploader (#3266)


											
										
										
											2022-04-01 10:31:58 +00:00
+								    if lenient:
 								        mobj = re.search(r'(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)', s)
 								    else:
 								        mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s*[xX×,]\s*(?P<h>\d+)(?![a-zA-Z0-9])', s)
-												[utils] Add parse_resolution

											
										
										
											2018-03-02 16:39:04 +00:00
+								    if mobj:
 								        return {
 								            'width': int(mobj.group('w')),
 								            'height': int(mobj.group('h')),
 								        }
-												[microsoftstream] Add extractor (#1201)

Based on: https://github.com/ytdl-org/youtube-dl/pull/24649
Fixes: https://github.com/ytdl-org/youtube-dl/issues/24440
Authored by: damianoamatruda, nixklai
											
										
										
											2021-10-22 00:04:00 +00:00
+								    mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
-												[utils] Add parse_resolution

											
										
										
											2018-03-02 16:39:04 +00:00
+								    if mobj:
 								        return {'height': int(mobj.group(1))}
 								    mobj = re.search(r'\b([48])[kK]\b', s)
 								    if mobj:
 								        return {'height': int(mobj.group(1)) * 540}
 								    return {}
-												[utils] Introduce parse_bitrate

											
										
										
											2019-03-17 02:07:47 +00:00
+								def parse_bitrate(s):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    if not isinstance(s, str):
-												[utils] Introduce parse_bitrate

											
										
										
											2019-03-17 02:07:47 +00:00
+								        return
 								    mobj = re.search(r'\b(\d+)\s*kbps', s)
 								    if mobj:
 								        return int(mobj.group(1))
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											2016-09-02 16:31:52 +00:00
+								def month_by_name(name, lang='en'):
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											2013-12-09 18:39:41 +00:00
+								    """ Return the number of a month by (locale-independently) English name """
-												[utils] Improve month_by_name and add tests

											
										
										
											2016-09-14 16:13:55 +00:00
+								    month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
-												[utils,franceinter] Add french months' names and fix extraction

Update of the "FranceInter" radio extractor : webpages HTML structure
had changed, the extractor didn't work. So I updated this extractor to
get the mp3 URL and all details.

											
										
										
											2016-09-02 16:31:52 +00:00
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											2013-12-09 18:39:41 +00:00
+								    try:
-												[utils] Improve month_by_name and add tests

											
										
										
											2016-09-14 16:13:55 +00:00
+								        return month_names.index(name) + 1
-												[Yam] Add new extractor

											
										
										
											2015-02-13 07:14:23 +00:00
+								    except ValueError:
 								        return None
 								def month_by_abbreviation(abbrev):
 								    """ Return the number of a month by (locale-independently) English
 								        abbreviations """
 								    try:
 								        return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
-												[ndtv] Add extractor (Fixes #1924)

											
										
										
											2013-12-09 18:39:41 +00:00
+								    except ValueError:
 								        return None
-												Use `_download_xml` in more extractors

											
										
										
											2013-12-10 20:03:53 +00:00
-												Correct XML ampersand fixup

											
										
										
											2014-01-20 21:11:34 +00:00
+								def fix_xml_ampersands(xml_str):
-												Use `_download_xml` in more extractors

											
										
										
											2013-12-10 20:03:53 +00:00
+								    """Replace all the '&' by '&amp;' in XML"""
-												Correct XML ampersand fixup

											
										
										
											2014-01-20 21:11:34 +00:00
+								    return re.sub(
 								        r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								        '&amp;',
-												Correct XML ampersand fixup

											
										
										
											2014-01-20 21:11:34 +00:00
+								        xml_str)
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											2013-12-16 04:04:12 +00:00
 								def setproctitle(title):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    assert isinstance(title, str)
-												[utils] Jython support - disable setproctitle() until ctypes is complete

											
										
										
											2016-02-20 19:29:02 +00:00
-												Import ctypes only when necessary

Closes #4541

											
										
										
											2022-08-03 12:17:38 +00:00
+								    # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
 								    try:
 								        import ctypes
 								    except ImportError:
-												[utils] Jython support - disable setproctitle() until ctypes is complete

											
										
										
											2016-02-20 19:29:02 +00:00
+								        return
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											2013-12-16 04:04:12 +00:00
+								    try:
-												[refactor] Single quotes consistency

											
										
										
											2016-02-14 09:37:17 +00:00
+								        libc = ctypes.cdll.LoadLibrary('libc.so.6')
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											2013-12-16 04:04:12 +00:00
+								    except OSError:
 								        return
-												utils.py: Workaround TypeError with Python 2.7.13 in Windows

Fixes #11540

Tested with Windows Python 2.7.12 and 2.7.13.

											
										
										
											2017-02-10 21:05:09 +00:00
+								    except TypeError:
 								        # LoadLibrary in Windows Python 2.7.13 only expects
 								        # a bytestring, but since unicode_literals turns
 								        # every string into a unicode string, it fails.
 								        return
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    title_bytes = title.encode()
-												[utils] Simplify setproctitle

											
										
										
											2014-03-23 13:28:22 +00:00
+								    buf = ctypes.create_string_buffer(len(title_bytes))
 								    buf.value = title_bytes
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											2013-12-16 04:04:12 +00:00
+								    try:
-												[cleanup] Misc (#8598)

Authored by: bashonly, pukkandan, seproDev, Grub4K

Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
											
										
										
											2023-12-30 21:27:36 +00:00
+								        # PR_SET_NAME = 15      Ref: /usr/include/linux/prctl.h
-												[utils] Simplify setproctitle

											
										
										
											2014-03-23 13:28:22 +00:00
+								        libc.prctl(15, buf, 0, 0, 0)
-												Set process title to youtube-dl

This allows killing all youtube-dl processes with killall youtube-dl, and shows up nicer in some programs.

											
										
										
											2013-12-16 04:04:12 +00:00
+								    except AttributeError:
 								        return  # Strange libc, just skip this
-												[blinkx] Add extractor (Fixes #1972)

											
										
										
											2013-12-16 12:56:13 +00:00
 								def remove_start(s, start):
-												[utils] Allow None in remove_{start,end}

											
										
										
											2016-05-18 22:31:30 +00:00
+								    return s[len(start):] if s is not None and s.startswith(start) else s
-												Add webpage_url_basename info_dict field (Fixes #1938)

											
										
										
											2013-12-17 03:13:36 +00:00
-												[rtve] Add support for live stream

At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.

											
										
										
											2014-08-22 16:40:26 +00:00
+								def remove_end(s, end):
-												[utils] Allow None in remove_{start,end}

											
										
										
											2016-05-18 22:31:30 +00:00
+								    return s[:-len(end)] if s is not None and s.endswith(end) else s
-												[rtve] Add support for live stream

At the moment, only RTVE-1 seems to work flawlessly.
-2 seems geoblocked right now.
-TDP doesn't seem to be available outside of Spain.

											
										
										
											2014-08-22 16:40:26 +00:00
-												[utils] Add remove_quotes

											
										
										
											2015-12-14 15:30:58 +00:00
+								def remove_quotes(s):
 								    if s is None or len(s) < 2:
 								        return s
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    for quote in ('"', "'"):
-												[utils] Add remove_quotes

											
										
										
											2015-12-14 15:30:58 +00:00
+								        if s[0] == quote and s[-1] == quote:
 								            return s[1:-1]
 								    return s
-												[mtv] fix mtv.com and more(?)


											
										
										
											2020-10-09 05:06:49 +00:00
+								def get_domain(url):
-												[utils] Fix `get_domain`

Bug in ae61d108dd83a951b6e8a27e1fb969682416150d

Closes #4344

											
										
										
											2022-07-13 14:12:52 +00:00
+								    """
 								    This implementation is inconsistent, but is kept for compatibility.
 								    Use this only for "webpage_url_domain"
 								    """
 								    return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
-												[mtv] fix mtv.com and more(?)


											
										
										
											2020-10-09 05:06:49 +00:00
-												Add webpage_url_basename info_dict field (Fixes #1938)

											
										
										
											2013-12-17 03:13:36 +00:00
+								def url_basename(url):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    path = urllib.parse.urlparse(url).path
-												[utils] Remove stray u'

											
										
										
											2014-11-17 06:16:12 +00:00
+								    return path.strip('/').split('/')[-1]
-												[aparat] Add support (Fixes #2012)

											
										
										
											2013-12-20 16:05:28 +00:00
-												[utils] Introduce base_url

											
										
										
											2016-11-01 19:14:01 +00:00
+								def base_url(url):
-												[utils] `base_url`: URL paths can contain `&` (#4841)

Authored by: elyse0
Closes #4187
											
										
										
											2022-09-04 03:09:45 +00:00
+								    return re.match(r'https?://[^?#]+/', url).group()
-												[utils] Introduce base_url

											
										
										
											2016-11-01 19:14:01 +00:00
-												[utils] Add convenience urljoin

											
										
										
											2016-12-12 19:23:49 +00:00
+								def urljoin(base, path):
-												[utils] Process bytestrings in urljoin (closes #12369)

											
										
										
											2017-03-05 20:57:46 +00:00
+								    if isinstance(path, bytes):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        path = path.decode()
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    if not isinstance(path, str) or not path:
-												[utils] Add convenience urljoin

											
										
										
											2016-12-12 19:23:49 +00:00
+								        return None
-												[utils] Fix urljoin for paths with non-http(s) schemes

											
										
										
											2019-01-20 13:21:24 +00:00
+								    if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
-												[utils] Add convenience urljoin

											
										
										
											2016-12-12 19:23:49 +00:00
+								        return path
-												[utils] Process bytestrings in urljoin (closes #12369)

											
										
										
											2017-03-05 20:57:46 +00:00
+								    if isinstance(base, bytes):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        base = base.decode()
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    if not isinstance(base, str) or not re.match(
-												[utils] Process bytestrings in urljoin (closes #12369)

											
										
										
											2017-03-05 20:57:46 +00:00
+								            r'^(?:https?:)?//', base):
-												[utils] Add convenience urljoin

											
										
										
											2016-12-12 19:23:49 +00:00
+								        return None
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return urllib.parse.urljoin(base, path)
-												[utils] Add convenience urljoin

											
										
										
											2016-12-12 19:23:49 +00:00
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											2014-07-21 10:02:44 +00:00
+								def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
-												[cleanup] Misc cleanup

											
										
										
											2022-01-03 19:37:24 +00:00
+								    if get_attr and v is not None:
 								        v = getattr(v, get_attr, None)
-												[utils] Do not fail in int_or_none on non-numeric data (Closes #7175)

											
										
										
											2015-10-14 16:35:01 +00:00
+								    try:
 								        return int(v) * invscale // scale
-												[ExtractAudio] Rescale --audio-quality correctly
Authored by: CrypticSignal, pukkandan

											
										
										
											2021-11-03 18:35:53 +00:00
+								    except (ValueError, TypeError, OverflowError):
-												[utils] Return default on fail in int_or_none

											
										
										
											2015-10-14 16:37:03 +00:00
+								        return default
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											2014-07-21 10:02:44 +00:00
-												[appletrailers] Support height-less videos

											
										
										
											2014-08-10 11:04:45 +00:00
-												[reverbnation] The 'uploader_id' field must be a string

											
										
										
											2014-08-10 09:00:14 +00:00
+								def str_or_none(v, default=None):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return default if v is None else str(v)
-												[reverbnation] The 'uploader_id' field must be a string

											
										
										
											2014-08-10 09:00:14 +00:00
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											2014-07-21 10:02:44 +00:00
 								def str_to_int(int_str):
-												[eporner] Simplify and correct (#3629)

											
										
										
											2014-08-31 21:51:36 +00:00
+								    """ A more relaxed version of int_or_none """
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								    if isinstance(int_str, int):
-												[utils] handle int values passed to str_to_int

											
										
										
											2019-11-29 16:05:06 +00:00
+								        return int_str
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    elif isinstance(int_str, str):
-												[utils] Improve str_to_int

											
										
										
											2019-12-15 16:15:24 +00:00
+								        int_str = re.sub(r'[,\.\+]', '', int_str)
 								        return int_or_none(int_str)
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											2013-12-26 12:49:44 +00:00
-												[snotr] PEP8 and minor fixes (#3296)

											
										
										
											2014-07-21 10:02:44 +00:00
+								def float_or_none(v, scale=1, invscale=1, default=None):
-												[utils] Do not fail in float_or_none on non-numeric data

											
										
										
											2015-10-14 16:36:37 +00:00
+								    if v is None:
 								        return default
 								    try:
 								        return float(v) * invscale / scale
-												[utils] Improve int_or_none and float_or_none (#20403)

											
										
										
											2019-03-22 18:08:54 +00:00
+								    except (ValueError, TypeError):
-												[utils] Do not fail in float_or_none on non-numeric data

											
										
										
											2015-10-14 16:36:37 +00:00
+								        return default
-												[comedycentral] Duration can now be a float (Fixes #2647)

											
										
										
											2014-03-28 22:06:34 +00:00
-												[utils] Introduce bool_or_none

											
										
										
											2017-09-10 12:08:39 +00:00
+								def bool_or_none(v, default=None):
 								    return v if isinstance(v, bool) else default
-												[utils] Improve strip_or_none

											
										
										
											2019-05-23 16:58:35 +00:00
+								def strip_or_none(v, default=None):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return v.strip() if isinstance(v, str) else default
-												[utils] Add strip_or_none

											
										
										
											2016-06-25 15:32:02 +00:00
-												[utils] Introduce url_or_none

											
										
										
											2018-07-21 11:01:06 +00:00
+								def url_or_none(url):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    if not url or not isinstance(url, str):
-												[utils] Introduce url_or_none

											
										
										
											2018-07-21 11:01:06 +00:00
+								        return None
 								    url = url.strip()
-												Update to ytdl-2021.01.03

											
										
										
											2021-01-01 12:26:37 +00:00
+								    return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
-												[utils] Introduce url_or_none

											
										
										
											2018-07-21 11:01:06 +00:00
-												[cleanup] Misc

Closes #6288, Closes #7197, Closes #7265, Closes #7353, Closes #5773
Authored by: mikf, freezboltz, pukkandan

											
										
										
											2023-06-21 03:51:20 +00:00
+								def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
-												#45 Allow date/time formatting in output template

Closes #43
											
										
										
											2021-02-02 21:15:00 +00:00
+								    datetime_object = None
 								    try:
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								        if isinstance(timestamp, (int, float)):  # unix timestamp
-												[utils] `strftime_or_none`: Workaround Python bug on Windows

CLoses #5185

											
										
										
											2022-10-08 22:48:28 +00:00
+								            # Using naive datetime here can break timestamp() in Windows
 								            # Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            # Also, dt.datetime.fromtimestamp breaks for negative timestamps
-												[utils] `strftime_or_none`: Handle negative timestamps

Closes #6706
Authored by pukkandan, dirkf

											
										
										
											2023-06-20 23:48:03 +00:00
+								            # Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
 								                               + dt.timedelta(seconds=timestamp))
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        elif isinstance(timestamp, str):  # assume YYYYMMDD
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								            datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
-												[outtmpl] Make `%s` work in strfformat for all systems

											
										
										
											2022-09-17 06:04:04 +00:00
+								        date_format = re.sub(  # Support %s on windows
 								            r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
-												#45 Allow date/time formatting in output template

Closes #43
											
										
										
											2021-02-02 21:15:00 +00:00
+								        return datetime_object.strftime(date_format)
 								    except (ValueError, TypeError, AttributeError):
 								        return default
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											2013-12-26 12:49:44 +00:00
+								def parse_duration(s):
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								    if not isinstance(s, str):
-												[cnn] Add multiple formats, duration, and upload_date

											
										
										
											2013-12-26 12:49:44 +00:00
+								        return None
-												[utils] Improve parse_duration

											
										
										
											2014-08-30 23:41:30 +00:00
+								    s = s.strip()
-												Use `parse_duration` for `--wait-for-video`
and some minor fix

											
										
										
											2021-12-06 18:00:33 +00:00
+								    if not s:
 								        return None
-												[utils] Improve parse_duration

											
										
										
											2014-08-30 23:41:30 +00:00
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								    days, hours, mins, secs, ms = [None] * 5
-												[utils] Handle `ss:xxx` in `parse_duration`

Closes #2388

											
										
										
											2022-01-19 12:41:27 +00:00
+								    m = re.match(r'''(?x)
 								            (?P<before_secs>
 								                (?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
 								            (?P<secs>(?(before_secs)[0-9]{1,2}|[0-9]+))
 								            (?P<ms>[.:][0-9]+)?Z?$
 								        ''', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								    if m:
-												[utils] Handle `ss:xxx` in `parse_duration`

Closes #2388

											
										
										
											2022-01-19 12:41:27 +00:00
+								        days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								    else:
 								        m = re.match(
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											2017-10-29 00:04:48 +00:00
+								            r'''(?ix)(?:P?
 								                (?:
-												[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
											
										
										
											2022-03-28 00:49:42 +00:00
+								                    [0-9]+\s*y(?:ears?)?,?\s*
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											2017-10-29 00:04:48 +00:00
+								                )?
 								                (?:
-												[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
											
										
										
											2022-03-28 00:49:42 +00:00
+								                    [0-9]+\s*m(?:onths?)?,?\s*
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											2017-10-29 00:04:48 +00:00
+								                )?
 								                (?:
-												[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
											
										
										
											2022-03-28 00:49:42 +00:00
+								                    [0-9]+\s*w(?:eeks?)?,?\s*
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											2017-10-29 00:04:48 +00:00
+								                )?
-												[ntvde] Add new extractor (Fixes #4850)

											
										
										
											2015-02-02 20:48:54 +00:00
+								                (?:
-												[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
											
										
										
											2022-03-28 00:49:42 +00:00
+								                    (?P<days>[0-9]+)\s*d(?:ays?)?,?\s*
-												[ntvde] Add new extractor (Fixes #4850)

											
										
										
											2015-02-02 20:48:54 +00:00
+								                )?
-												[utils] Add support for zero years and months in parse_duration

											
										
										
											2017-10-29 00:04:48 +00:00
+								                T)?
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								                (?:
-												[utils] Improve `parse_duration`

Authored by: bashonly

											
										
										
											2023-07-20 13:40:31 +00:00
+								                    (?P<hours>[0-9]+)\s*h(?:(?:ou)?rs?)?,?\s*
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								                )?
 								                (?:
-												[youtube:tab] Fix duration extraction for shorts (#3171)

Related: https://github.com/TeamNewPipe/NewPipe/issues/8034
Authored-by: coletdjnz
											
										
										
											2022-03-28 00:49:42 +00:00
+								                    (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?,?\s*
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								                )?
 								                (?:
 								                    (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
-												[utils] Improve parse_duration

											
										
										
											2017-01-26 16:23:08 +00:00
+								                )?Z?$''', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								        if m:
 								            days, hours, mins, secs, ms = m.groups()
 								        else:
-												[utils] Improve parse_duration

											
										
										
											2017-01-26 16:23:08 +00:00
+								            m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
-												[utils] imporove parse_duration to handle more formats

											
										
										
											2016-04-07 18:30:47 +00:00
+								            if m:
 								                hours, mins = m.groups()
 								            else:
 								                return None
 								    if ms:
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								        ms = ms.replace(':', '.')
 								    return sum(float(part or 0) * mult for part, mult in (
 								        (days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
-												FFmpegMetadataPP; Write temporary file to `something.temp.{ext}` (fixes #2079)

ffmpeg correctly recognize the formats of extensions like m4a, but it doesn’t works if it’s passed with the `—format` option.

											
										
										
											2014-01-03 11:52:27 +00:00
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								def _change_extension(prepend, filename, ext, expected_real_ext=None):
-												PEP8 applied

											
										
										
											2014-11-23 19:41:03 +00:00
+								    name, real_ext = os.path.splitext(filename)
-												Move check_executable into a helper ufnction

											
										
										
											2014-01-07 05:23:41 +00:00
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								    if not expected_real_ext or real_ext[1:] == expected_real_ext:
 								        filename = name
 								        if prepend and real_ext:
 								            _UnsafeExtensionError.sanitize_extension(ext, prepend=True)
 								            return f'{filename}.{ext}{real_ext}'
 								    return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
-												Move check_executable into a helper ufnction

											
										
										
											2014-01-07 05:23:41 +00:00
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
 								prepend_extension = functools.partial(_change_extension, True)
 								replace_extension = functools.partial(_change_extension, False)
-												[utils] Add replace_extension

											
										
										
											2015-05-02 17:23:06 +00:00
-												Move check_executable into a helper ufnction

											
										
										
											2014-01-07 05:23:41 +00:00
+								def check_executable(exe, args=[]):
 								    """ Checks if the given binary is installed somewhere in PATH, and returns its name.
 								    args can be a list of arguments for a short output (like -version) """
 								    try:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        Popen.run([exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-												Move check_executable into a helper ufnction

											
										
										
											2014-01-07 05:23:41 +00:00
+								    except OSError:
 								        return False
 								    return exe
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											2014-01-20 10:36:47 +00:00
-												[cleanup] Misc

Closes #5471, Closes #5312

Authored by: pukkandan, Alienmaster

											
										
										
											2022-11-11 03:13:08 +00:00
+								def _get_exe_version_output(exe, args):
-												[ffmpeg] Move version detection to utils

											
										
										
											2014-11-02 09:50:30 +00:00
+								    try:
-												[utils] Clarify for redirecting STDIN  in get_exe_version()

											
										
										
											2016-10-22 05:04:05 +00:00
+								        # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
-												Completely change project name to yt-dlp (#85)

* All modules and binary names are changed
* All documentation references changed
* yt-dlp no longer loads youtube-dlc config files
* All URLs changed to point to organization account

Co-authored-by: Pccode66
Co-authored-by: pukkandan
											
										
										
											2021-02-24 18:45:56 +00:00
+								        # SIGTTOU if yt-dlp is run in the background.
-												Start moving to ytdl-org

											
										
										
											2019-03-09 12:14:41 +00:00
+								        # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        stdout, _, ret = Popen.run([encodeArgument(exe), *args], text=True,
-												[utils] `get_exe_version`: Detect broken executables

Authored by: dirkf, pukkandan
Closes #5561

											
										
										
											2023-01-01 08:41:14 +00:00
+								                                   stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 								        if ret:
 								            return None
-												[ffmpeg] Move version detection to utils

											
										
										
											2014-11-02 09:50:30 +00:00
+								    except OSError:
 								        return False
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								    return stdout
-												Improve and test ffmpeg version detection

											
										
										
											2014-12-14 20:59:59 +00:00
 								def detect_exe_version(output, version_re=None, unrecognized='present'):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    assert isinstance(output, str)
-												Improve and test ffmpeg version detection

											
										
										
											2014-12-14 20:59:59 +00:00
+								    if version_re is None:
 								        version_re = r'version\s+([-0-9._a-zA-Z]+)'
 								    m = re.search(version_re, output)
-												[ffmpeg] Move version detection to utils

											
										
										
											2014-11-02 09:50:30 +00:00
+								    if m:
 								        return m.group(1)
 								    else:
 								        return unrecognized
-												[ffmpeg] Framework for feature detection
Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581

											
										
										
											2021-11-03 18:53:48 +00:00
+								def get_exe_version(exe, args=['--version'],
-												[utils] `get_exe_version`: Detect broken executables

Authored by: dirkf, pukkandan
Closes #5561

											
										
										
											2023-01-01 08:41:14 +00:00
+								                    version_re=None, unrecognized=('present', 'broken')):
-												[ffmpeg] Framework for feature detection
Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581

											
										
										
											2021-11-03 18:53:48 +00:00
+								    """ Returns the version of the specified executable,
 								    or False if the executable is not present """
-												[utils] `get_exe_version`: Detect broken executables

Authored by: dirkf, pukkandan
Closes #5561

											
										
										
											2023-01-01 08:41:14 +00:00
+								    unrecognized = variadic(unrecognized)
 								    assert len(unrecognized) in (1, 2)
-												[ffmpeg] Framework for feature detection
Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581

											
										
										
											2021-11-03 18:53:48 +00:00
+								    out = _get_exe_version_output(exe, args)
-												[utils] `get_exe_version`: Detect broken executables

Authored by: dirkf, pukkandan
Closes #5561

											
										
										
											2023-01-01 08:41:14 +00:00
+								    if out is None:
 								        return unrecognized[-1]
 								    return out and detect_exe_version(out, version_re, unrecognized[0])
-												[ffmpeg] Framework for feature detection
Related: #1502, #1237, https://github.com/ytdl-org/youtube-dl/pull/29581

											
										
										
											2021-11-03 18:53:48 +00:00
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								def frange(start=0, stop=None, step=1):
 								    """Float range"""
 								    if stop is None:
 								        start, stop = 0, start
 								    sign = [-1, 1][step > 0] if step else 0
 								    while sign * start < sign * stop:
 								        yield start
 								        start += step
-												[test] Add Python 3.10 (#480)

Authored-by: pukkandan, xtkoba
											
										
										
											2021-07-23 15:02:48 +00:00
+								class LazyList(collections.abc.Sequence):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    """Lazy immutable list from an iterable
 								    Note that slices of a LazyList are lists and not LazyList"""
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    class IndexError(IndexError):  # noqa: A001
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								        pass
-												[utils] Fix error when copying `LazyList`

											
										
										
											2021-11-20 02:35:57 +00:00
+								    def __init__(self, iterable, *, reverse=False, _cache=None):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        self._iterable = iter(iterable)
 								        self._cache = [] if _cache is None else _cache
 								        self._reversed = reverse
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
 								    def __iter__(self):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        if self._reversed:
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
+								            # We need to consume the entire iterable to iterate in reverse
-												Some minor fixes and refactoring (see desc)

* [utils] Fix issues with reversal
* check_formats should catch `DownloadError`, not `ExtractorError`
* Simplify format selectors with `LazyList` and `yield from`

											
										
										
											2021-06-27 02:05:58 +00:00
+								            yield from self.exhaust()
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
+								            return
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        yield from self._cache
 								        for item in self._iterable:
 								            self._cache.append(item)
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
+								            yield item
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    def _exhaust(self):
 								        self._cache.extend(self._iterable)
 								        self._iterable = []  # Discard the emptied iterable to make it pickle-able
 								        return self._cache
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
-												Some minor fixes and refactoring (see desc)

* [utils] Fix issues with reversal
* check_formats should catch `DownloadError`, not `ExtractorError`
* Simplify format selectors with `LazyList` and `yield from`

											
										
										
											2021-06-27 02:05:58 +00:00
+								    def exhaust(self):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        """Evaluate the entire iterable"""
 								        return self._exhaust()[::-1 if self._reversed else 1]
-												Some minor fixes and refactoring (see desc)

* [utils] Fix issues with reversal
* check_formats should catch `DownloadError`, not `ExtractorError`
* Simplify format selectors with `LazyList` and `yield from`

											
										
										
											2021-06-27 02:05:58 +00:00
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
+								    @staticmethod
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    def _reverse_index(x):
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								        return None if x is None else ~x
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
 								    def __getitem__(self, idx):
 								        if isinstance(idx, slice):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            if self._reversed:
 								                idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
-												[utils] Fix slicing of reversed `LazyList`

Closes #589

											
										
										
											2021-08-01 06:17:30 +00:00
+								            start, stop, step = idx.start, idx.stop, idx.step or 1
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
+								        elif isinstance(idx, int):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            if self._reversed:
 								                idx = self._reverse_index(idx)
-												[utils] Fix slicing of reversed `LazyList`

Closes #589

											
										
										
											2021-08-01 06:17:30 +00:00
+								            start, stop, step = idx, idx, 0
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
+								        else:
 								            raise TypeError('indices must be integers or slices')
-												[utils] Fix slicing of reversed `LazyList`

Closes #589

											
										
										
											2021-08-01 06:17:30 +00:00
+								        if ((start or 0) < 0 or (stop or 0) < 0
 								                or (start is None and step < 0)
 								                or (stop is None and step > 0)):
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
+								            # We need to consume the entire iterable to be able to slice from the end
 								            # Obviously, never use this with infinite iterables
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            self._exhaust()
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								            try:
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								                return self._cache[idx]
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								            except IndexError as e:
 								                raise self.IndexError(e) from e
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        n = max(start or 0, stop or 0) - len(self._cache) + 1
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
+								        if n > 0:
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            self._cache.extend(itertools.islice(self._iterable, n))
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								        try:
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            return self._cache[idx]
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								        except IndexError as e:
 								            raise self.IndexError(e) from e
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
 								    def __bool__(self):
 								        try:
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            self[-1] if self._reversed else self[0]
-												Handle more playlist errors with `-i`

											
										
										
											2021-09-03 21:37:27 +00:00
+								        except self.IndexError:
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
+								            return False
 								        return True
 								    def __len__(self):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        self._exhaust()
 								        return len(self._cache)
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
-												[utils] Fix error when copying `LazyList`

											
										
										
											2021-11-20 02:35:57 +00:00
+								    def __reversed__(self):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
-												[utils] Fix error when copying `LazyList`

											
										
										
											2021-11-20 02:35:57 +00:00
 								    def __copy__(self):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								        return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
-												[utils] Fix error when copying `LazyList`

											
										
										
											2021-11-20 02:35:57 +00:00
-												[utils] Improve `LazyList`
* Add `repr` and `str` that mimics `list`
* Add `reversed`. Unlike `[::-1]`, reversed does not exhaust the iterable and modifies the `LazyList` in-place
* Add tests

											
										
										
											2021-06-12 15:14:30 +00:00
+								    def __repr__(self):
 								        # repr and str should mimic a list. So we exhaust the iterable
 								        return repr(self.exhaust())
 								    def __str__(self):
 								        return repr(self.exhaust())
-												[utils] Add `LazyList`

											
										
										
											2021-05-28 16:49:13 +00:00
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								class PagedList:
-												[utils] Fix `PagedList`
Bug in d8cf8d97a8dbc9602556de474af133b5ab0e0a29

											
										
										
											2021-11-19 15:15:52 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    class IndexError(IndexError):  # noqa: A001
-												[utils] Fix `PagedList`
Bug in d8cf8d97a8dbc9602556de474af133b5ab0e0a29

											
										
										
											2021-11-19 15:15:52 +00:00
+								        pass
-												Add __len__ to PagedLists

											
										
										
											2014-01-22 20:43:33 +00:00
+								    def __len__(self):
 								        # This is only useful for tests
 								        return len(self.getslice())
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								    def __init__(self, pagefunc, pagesize, use_cache=True):
 								        self._pagefunc = pagefunc
 								        self._pagesize = pagesize
-												[utils] OnDemandPagedList: Do not download pages after error

											
										
										
											2022-03-02 21:12:52 +00:00
+								        self._pagecount = float('inf')
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								        self._use_cache = use_cache
 								        self._cache = {}
 								    def getpage(self, pagenum):
-												[utils] Fix `PagedList`

											
										
										
											2021-11-16 15:44:02 +00:00
+								        page_results = self._cache.get(pagenum)
 								        if page_results is None:
-												[utils] OnDemandPagedList: Do not download pages after error

											
										
										
											2022-03-02 21:12:52 +00:00
+								            page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								        if self._use_cache:
 								            self._cache[pagenum] = page_results
 								        return page_results
 								    def getslice(self, start=0, end=None):
 								        return list(self._getslice(start, end))
 								    def _getslice(self, start, end):
-												[utils] Add `__getitem__` for `PagedList`

											
										
										
											2021-05-17 13:44:20 +00:00
+								        raise NotImplementedError('This method must be implemented by subclasses')
 								    def __getitem__(self, idx):
-												[utils] OnDemandPagedList: Do not download pages after error

											
										
										
											2022-03-02 21:12:52 +00:00
+								        assert self._use_cache, 'Indexing PagedList requires cache'
-												[utils] Add `__getitem__` for `PagedList`

											
										
										
											2021-05-17 13:44:20 +00:00
+								        if not isinstance(idx, int) or idx < 0:
 								            raise TypeError('indices must be non-negative integers')
 								        entries = self.getslice(idx, idx + 1)
-												[utils] Fix `PagedList`

											
										
										
											2021-11-16 15:44:02 +00:00
+								        if not entries:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise self.IndexError
-												[utils] Fix `PagedList`

											
										
										
											2021-11-16 15:44:02 +00:00
+								        return entries[0]
-												[utils] Add `__getitem__` for `PagedList`

											
										
										
											2021-05-17 13:44:20 +00:00
-												[cleanup] Misc (#8598)

Authored by: bashonly, pukkandan, seproDev, Grub4K

Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
											
										
										
											2023-12-30 21:27:36 +00:00
+								    def __bool__(self):
 								        return bool(self.getslice(0, 1))
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
 								class OnDemandPagedList(PagedList):
-												[cleanup] Misc fixes

Closes https://github.com/yt-dlp/yt-dlp/pull/3213, Closes https://github.com/yt-dlp/yt-dlp/pull/3117

Related: https://github.com/yt-dlp/yt-dlp/issues/3146#issuecomment-1077323114, https://github.com/yt-dlp/yt-dlp/pull/3277#discussion_r841019671, https://github.com/yt-dlp/yt-dlp/commit/a825ffbffa0bea322e3ccb44c6f8e01d8d9572fb#commitcomment-68538986, https://github.com/yt-dlp/yt-dlp/issues/2360, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393519, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393254

											
										
										
											2022-03-27 02:20:43 +00:00
+								    """Download pages until a page with less than maximum results"""
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								    def _getslice(self, start, end):
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											2014-01-20 10:36:47 +00:00
+								        for pagenum in itertools.count(start // self._pagesize):
 								            firstid = pagenum * self._pagesize
 								            nextfirstid = pagenum * self._pagesize + self._pagesize
 								            if start >= nextfirstid:
 								                continue
 								            startv = (
 								                start % self._pagesize
 								                if firstid <= start < nextfirstid
 								                else 0)
 								            endv = (
 								                ((end - 1) % self._pagesize) + 1
 								                if (end is not None and firstid <= end <= nextfirstid)
 								                else None)
-												[utils] OnDemandPagedList: Do not download pages after error

											
										
										
											2022-03-02 21:12:52 +00:00
+								            try:
 								                page_results = self.getpage(pagenum)
 								            except Exception:
 								                self._pagecount = pagenum - 1
 								                raise
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											2014-01-20 10:36:47 +00:00
+								            if startv != 0 or endv is not None:
 								                page_results = page_results[startv:endv]
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								            yield from page_results
-												Add infrastructure for paged lists

This commit allows to download pages in playlists as needed instead of all at once.
Before this commit,
    youtube-dl http://www.youtube.com/user/ANNnewsCH/videos --playlist-end 2 --skip-download
took quite some time - now it's almost instantaneous.
As an example, the youtube:user extractor has been converted.
Fixes #2175

											
										
										
											2014-01-20 10:36:47 +00:00
 								            # A little optimization - if current page is not "full", ie. does
 								            # not contain page_size videos then we can assume that this page
 								            # is the last one - there are no more ids on further pages -
 								            # i.e. no need to query again.
 								            if len(page_results) + startv < self._pagesize:
 								                break
 								            # If we got the whole page, but the next page is not interesting,
 								            # break out early as well
 								            if end == nextfirstid:
 								                break
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											2014-02-09 16:56:10 +00:00
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								class InAdvancePagedList(PagedList):
-												[cleanup] Misc fixes

Closes https://github.com/yt-dlp/yt-dlp/pull/3213, Closes https://github.com/yt-dlp/yt-dlp/pull/3117

Related: https://github.com/yt-dlp/yt-dlp/issues/3146#issuecomment-1077323114, https://github.com/yt-dlp/yt-dlp/pull/3277#discussion_r841019671, https://github.com/yt-dlp/yt-dlp/commit/a825ffbffa0bea322e3ccb44c6f8e01d8d9572fb#commitcomment-68538986, https://github.com/yt-dlp/yt-dlp/issues/2360, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393519, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393254

											
										
										
											2022-03-27 02:20:43 +00:00
+								    """PagedList with total number of pages known in advance"""
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								    def __init__(self, pagefunc, pagecount, pagesize):
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								        PagedList.__init__(self, pagefunc, pagesize, True)
-												[utils] OnDemandPagedList: Do not download pages after error

											
										
										
											2022-03-02 21:12:52 +00:00
+								        self._pagecount = pagecount
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								    def _getslice(self, start, end):
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								        start_page = start // self._pagesize
-												Fix/improve `InAdvancePagedList`

											
										
										
											2022-01-23 17:55:17 +00:00
+								        end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								        skip_elems = start - start_page * self._pagesize
 								        only_more = None if end is None else end - start
 								        for pagenum in range(start_page, end_page):
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								            page_results = self.getpage(pagenum)
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								            if skip_elems:
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								                page_results = page_results[skip_elems:]
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								                skip_elems = None
 								            if only_more is not None:
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								                if len(page_results) < only_more:
 								                    only_more -= len(page_results)
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								                else:
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								                    yield from page_results[:only_more]
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
+								                    break
-												[utils] Fix `InAdvancePagedList.__getitem__`

Since it didn't have any cache, the page was re-fetched for each video.
* Also generalized the cache code

											
										
										
											2021-08-09 22:10:40 +00:00
+								            yield from page_results
-												[vimeo:likes] Support large like lists (Fixes #3847)

											
										
										
											2014-09-28 22:36:06 +00:00
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								class PlaylistEntries:
 								    MissingEntry = object()
 								    is_exhausted = False
 								    def __init__(self, ydl, info_dict):
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								        self.ydl = ydl
 								        # _entries must be assigned now since infodict can change during iteration
 								        entries = info_dict.get('entries')
 								        if entries is None:
 								            raise EntryNotInPlaylist('There are no entries')
 								        elif isinstance(entries, list):
 								            self.is_exhausted = True
 								        requested_entries = info_dict.get('requested_entries')
-												Fix bugs in `PlaylistEntries`

											
										
										
											2022-11-11 17:33:26 +00:00
+								        self.is_incomplete = requested_entries is not None
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								        if self.is_incomplete:
 								            assert self.is_exhausted
-												Fix bugs in `PlaylistEntries`

											
										
										
											2022-11-11 17:33:26 +00:00
+								            self._entries = [self.MissingEntry] * max(requested_entries or [0])
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								            for i, entry in zip(requested_entries, entries):
 								                self._entries[i - 1] = entry
 								        elif isinstance(entries, (list, PagedList, LazyList)):
 								            self._entries = entries
 								        else:
 								            self._entries = LazyList(entries)
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
 								    PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
 								        (?P<start>[+-]?\d+)?
 								        (?P<range>[:-]
 								            (?P<end>[+-]?\d+|inf(?:inite)?)?
 								            (?::(?P<step>[+-]?\d+))?
 								        )?''')
 								    @classmethod
 								    def parse_playlist_items(cls, string):
 								        for segment in string.split(','):
 								            if not segment:
 								                raise ValueError('There is two or more consecutive commas')
 								            mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
 								            if not mobj:
 								                raise ValueError(f'{segment!r} is not a valid specification')
 								            start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
 								            if int_or_none(step) == 0:
 								                raise ValueError(f'Step in {segment!r} cannot be zero')
 								            yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
 								    def get_requested_items(self):
 								        playlist_items = self.ydl.params.get('playlist_items')
 								        playlist_start = self.ydl.params.get('playliststart', 1)
 								        playlist_end = self.ydl.params.get('playlistend')
 								        # For backwards compatibility, interpret -1 as whole list
 								        if playlist_end in (-1, None):
 								            playlist_end = ''
 								        if not playlist_items:
 								            playlist_items = f'{playlist_start}:{playlist_end}'
 								        elif playlist_start != 1 or playlist_end:
 								            self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
 								        for index in self.parse_playlist_items(playlist_items):
 								            for i, entry in self[index]:
 								                yield i, entry
-												Fix playlist error handling

Bug in 7e88d7d78f452ea69f06bbdf23f82e9ad7c3de5e

											
										
										
											2022-06-22 03:09:14 +00:00
+								                if not entry:
 								                    continue
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								                try:
-												Fix `--break-on-existing` with `--lazy-playlist`

Closes #6399

											
										
										
											2023-03-03 18:29:00 +00:00
+								                    # The item may have just been added to archive. Don't break due to it
 								                    if not self.ydl.params.get('lazy_playlist'):
 								                        # TODO: Add auto-generated fields
 								                        self.ydl._match_entry(entry, incomplete=True, silent=True)
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								                except (ExistingVideoReached, RejectedVideoReached):
 								                    return
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								    def get_full_count(self):
 								        if self.is_exhausted and not self.is_incomplete:
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								            return len(self)
 								        elif isinstance(self._entries, InAdvancePagedList):
 								            if self._entries._pagesize == 1:
 								                return self._entries._pagecount
 								    @functools.cached_property
 								    def _getter(self):
 								        if isinstance(self._entries, list):
 								            def get_entry(i):
 								                try:
 								                    entry = self._entries[i]
 								                except IndexError:
 								                    entry = self.MissingEntry
 								                    if not self.is_incomplete:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                        raise self.IndexError
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								                if entry is self.MissingEntry:
-												Fix bugs in `PlaylistEntries`

											
										
										
											2022-11-11 17:33:26 +00:00
+								                    raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								                return entry
 								        else:
 								            def get_entry(i):
 								                try:
 								                    return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
 								                except (LazyList.IndexError, PagedList.IndexError):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                    raise self.IndexError
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								        return get_entry
 								    def __getitem__(self, idx):
 								        if isinstance(idx, int):
 								            idx = slice(idx, idx)
 								        # NB: PlaylistEntries[1:10] => (0, 1, ... 9)
 								        step = 1 if idx.step is None else idx.step
 								        if idx.start is None:
 								            start = 0 if step > 0 else len(self) - 1
 								        else:
 								            start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
 								        # NB: Do not call len(self) when idx == [:]
 								        if idx.stop is None:
 								            stop = 0 if step < 0 else float('inf')
 								        else:
 								            stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
 								        stop += [-1, 1][step > 0]
 								        for i in frange(start, stop, step):
 								            if i < 0:
 								                continue
 								            try:
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								                entry = self._getter(i)
 								            except self.IndexError:
 								                self.is_exhausted = True
 								                if step > 0:
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								                    break
-												Add option `--lazy-playlist` to process entries as they are received

											
										
										
											2022-06-17 08:05:04 +00:00
+								                continue
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								            yield i + 1, entry
 								    def __len__(self):
 								        return len(tuple(self[:]))
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    class IndexError(IndexError):  # noqa: A001
-												Add slicing notation to `--playlist-items`

* Adds support for negative indices and step
* Add `-I` as alias for `--playlist-index`
* Deprecates `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse`

Closes #2951, Closes #2853

											
										
										
											2022-06-17 04:48:21 +00:00
+								        pass
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											2014-02-09 16:56:10 +00:00
+								def uppercase_escape(s):
-												Fix unicode_escape (Fixes #2695)

											
										
										
											2014-04-04 21:00:51 +00:00
+								    unicode_escape = codecs.getdecoder('unicode_escape')
-												[youtube] Correct invalid JSON (Fixes #2353)

											
										
										
											2014-02-09 16:56:10 +00:00
+								    return re.sub(
-												[utils] Correct decoding of large unicode codepoints in uppercase_escape (Fixes #2664)

											
										
										
											2014-04-01 11:17:07 +00:00
+								        r'\\U[0-9a-fA-F]{8}',
-												Fix unicode_escape (Fixes #2695)

											
										
										
											2014-04-04 21:00:51 +00:00
+								        lambda m: unicode_escape(m.group(0))[0],
 								        s)
-												[NBC] Enhance embedURL extraction (closes #2549)

											
										
										
											2015-05-04 13:53:05 +00:00
 								def lowercase_escape(s):
 								    unicode_escape = codecs.getdecoder('unicode_escape')
 								    return re.sub(
 								        r'\\u[0-9a-fA-F]{4}',
 								        lambda m: unicode_escape(m.group(0))[0],
 								        s)
-												Fix f4m downloading on Python 2.6

											
										
										
											2014-02-15 15:24:43 +00:00
-												[YoutubeDL/utils] Clarify rationale for URL escaping in comment, move escape routines to utils and add some tests

											
										
										
											2014-09-13 13:59:16 +00:00
-												[extractor/telegram] Add playlist support and more metadata (#5358)

Authored by: bashonly, bsun0000
											
										
										
											2022-11-06 19:05:09 +00:00
+								def parse_qs(url, **kwargs):
 								    return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
-												[utils] Add `parse_qs`

											
										
										
											2021-08-22 19:02:00 +00:00
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											2014-02-25 00:43:17 +00:00
+								def read_batch_urls(batch_fd):
 								    def fixup(url):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        if not isinstance(url, str):
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											2014-02-25 00:43:17 +00:00
+								            url = url.decode('utf-8', 'replace')
-												batch-file enumeration improvements (https://github.com/ytdl-org/youtube-dl/pull/26813)

Co-authored by: glenn-slayden
Modified from https://github.com/ytdl-org/youtube-dl/pull/26813/commits/c9a9ccf8a35e157e22afeaafc2851176ddd87e68

These improvements apply to reading the list of URLs from the file supplied via the `--batch-file` (`-a`) command line option.

1. Skip blank and empty lines in the file. Currently, lines with leading whitespace are only skipped when that whitespace is followed by a comment character (`#`, `;`, or `]`). This means that empty lines and lines consisting only of whitespace are returned as (trimmed) empty strings in the list of URLs to process.

2. [bug fix] Detect and remove the Unicode BOM when the file descriptor is already decoding Unicode.

With Python 3, the `batch_fd` enumerator returns the lines of the file as Unicode. For UTF-8, this means that the raw BOM bytes from the file `\xef \xbb \xbf` show up converted into a single `\ufeff` character prefixed to the first enumerated text line.

This fix solves several buggy interactions between the presence of BOM, the skipping of comments and/or blank lines, and ensuring the list of URLs is consistently trimmed. For example, if the first line of the file is blank, the BOM is incorrectly returned as a URL standing alone. If the first line contains a URL, it will be prefixed with this unwanted single character--but note that its being there will have inhibited the proper trimming of any leading whitespace. Currently, the `UnicodeBOMIE` helper attempts to recover from some of these error cases, but this fix prevents the error from happening in the first place (at least on Python3). In any case, the `UnicodeBOMIE` approach is flawed, because it is clearly illogical for a BOM to appear in the (non-batch) URL(s) specified directly on the command line (and for that matter, on URLs *after the first line* of a batch list, also)

3. Adds proper trimming of the " #" into the read_batch_urls processing so that the URLs it enumerates are cleaned and trimmed more consistently.

											
										
										
											2021-01-09 12:38:03 +00:00
+								        BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
 								        for bom in BOM_UTF8:
 								            if url.startswith(bom):
 								                url = url[len(bom):]
 								        url = url.lstrip()
 								        if not url or url.startswith(('#', ';', ']')):
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											2014-02-25 00:43:17 +00:00
+								            return False
-												batch-file enumeration improvements (https://github.com/ytdl-org/youtube-dl/pull/26813)

Co-authored by: glenn-slayden
Modified from https://github.com/ytdl-org/youtube-dl/pull/26813/commits/c9a9ccf8a35e157e22afeaafc2851176ddd87e68

These improvements apply to reading the list of URLs from the file supplied via the `--batch-file` (`-a`) command line option.

1. Skip blank and empty lines in the file. Currently, lines with leading whitespace are only skipped when that whitespace is followed by a comment character (`#`, `;`, or `]`). This means that empty lines and lines consisting only of whitespace are returned as (trimmed) empty strings in the list of URLs to process.

2. [bug fix] Detect and remove the Unicode BOM when the file descriptor is already decoding Unicode.

With Python 3, the `batch_fd` enumerator returns the lines of the file as Unicode. For UTF-8, this means that the raw BOM bytes from the file `\xef \xbb \xbf` show up converted into a single `\ufeff` character prefixed to the first enumerated text line.

This fix solves several buggy interactions between the presence of BOM, the skipping of comments and/or blank lines, and ensuring the list of URLs is consistently trimmed. For example, if the first line of the file is blank, the BOM is incorrectly returned as a URL standing alone. If the first line contains a URL, it will be prefixed with this unwanted single character--but note that its being there will have inhibited the proper trimming of any leading whitespace. Currently, the `UnicodeBOMIE` helper attempts to recover from some of these error cases, but this fix prevents the error from happening in the first place (at least on Python3). In any case, the `UnicodeBOMIE` approach is flawed, because it is clearly illogical for a BOM to appear in the (non-batch) URL(s) specified directly on the command line (and for that matter, on URLs *after the first line* of a batch list, also)

3. Adds proper trimming of the " #" into the read_batch_urls processing so that the URLs it enumerates are cleaned and trimmed more consistently.

											
										
										
											2021-01-09 12:38:03 +00:00
+								        # "#" cannot be stripped out since it is part of the URI
-												[cleanup] Fix some typos (#4194)

Authored by: crazymoose77756
											
										
										
											2022-06-27 00:50:06 +00:00
+								        # However, it can be safely stripped out if following a whitespace
-												[misc] Cleanup (#9765)

Closes #9763
Authored by: bashonly, seproDev, Grub4K

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
											
										
										
											2024-05-26 21:37:49 +00:00
+								        return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
-												Ignore BOM in batch files (Fixes #2450)

											
										
										
											2014-02-25 00:43:17 +00:00
 								    with contextlib.closing(batch_fd) as fd:
 								        return [url for url in map(fixup, fd) if url]
-												[facebook] Fix login process

It was broken and didn't work in python 3.
And use `_download_webpage` instead of `compat_urllib_request.urlopen`.

											
										
										
											2014-03-07 14:25:33 +00:00
 								def urlencode_postdata(*args, **kargs):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return urllib.parse.urlencode(*args, **kargs).encode('ascii')
-												[generic] Suppress warning about doctypes in RSS parser

											
										
										
											2014-03-10 16:31:32 +00:00
-												Update to ytdl-commit-2dd6c6e

[YouTube] Avoid crash if uploader_id extraction fails
https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772

Except:
    * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing
    * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor
    * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p

											
										
										
											2023-02-17 11:21:34 +00:00
+								def update_url(url, *, query_update=None, **kwargs):
 								    """Replace URL components specified by kwargs
 								       @param url           str or parse url tuple
 								       @param query_update  update query
 								       @returns             str
 								    """
 								    if isinstance(url, str):
 								        if not kwargs and not query_update:
 								            return url
 								        else:
 								            url = urllib.parse.urlparse(url)
 								    if query_update:
 								        assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
 								        kwargs['query'] = urllib.parse.urlencode({
 								            **urllib.parse.parse_qs(url.query),
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            **query_update,
-												Update to ytdl-commit-2dd6c6e

[YouTube] Avoid crash if uploader_id extraction fails
https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772

Except:
    * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing
    * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor
    * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p

											
										
										
											2023-02-17 11:21:34 +00:00
+								        }, True)
 								    return urllib.parse.urlunparse(url._replace(**kwargs))
-												[utils] add update_url_query function

											
										
										
											2016-03-03 17:34:52 +00:00
+								def update_url_query(url, query):
-												Update to ytdl-commit-2dd6c6e

[YouTube] Avoid crash if uploader_id extraction fails
https://github.com/ytdl-org/youtube-dl/commit/2dd6c6edd8e0fc5e45865b8e6d865e35147de772

Except:
    * 295736c9cba714fb5de7d1c3dd31d86e50091cf8 [jsinterp] Improve parsing
    * 384f632e8a9b61e864a26678d85b2b39933b9bae [ITV] Overhaul ITV extractor
    * 33db85c571304bbd6863e3407ad8d08764c9e53b [feat]: Add support to external downloader aria2p

											
										
										
											2023-02-17 11:21:34 +00:00
+								    return update_url(url, query_update=query)
-												[utils] Add encode_dict

											
										
										
											2015-09-06 01:22:20 +00:00
-												[utils] Add encode_compat_str

											
										
										
											2015-12-20 00:26:26 +00:00
-												[utils] Rename try_multipart_encode to _multipart_encode_impl

To state that this is an internal function and people should be careful
when using it outside youtube-dl.

											
										
										
											2017-05-06 11:06:18 +00:00
+								def _multipart_encode_impl(data, boundary):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    content_type = f'multipart/form-data; boundary={boundary}'
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
 								    out = b''
 								    for k, v in data.items():
 								        out += b'--' + boundary.encode('ascii') + b'\r\n'
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        if isinstance(k, str):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            k = k.encode()
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        if isinstance(v, str):
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								            v = v.encode()
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								        # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
 								        # suggests sending UTF-8 directly. Firefox sends UTF-8, too
-												[utils] Fix multipart_encode for Python < 3.5

											
										
										
											2017-05-05 12:51:59 +00:00
+								        content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								        if boundary.encode('ascii') in content:
 								            raise ValueError('Boundary overlaps with data')
 								        out += content
 								    out += b'--' + boundary.encode('ascii') + b'--\r\n'
 								    return out, content_type
 								def multipart_encode(data, boundary=None):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								    Encode a dict to RFC 7578-compliant form-data
 								    data:
 								        A dict where keys and values can be either Unicode or bytes-like
 								        objects.
 								    boundary:
 								        If specified a Unicode object, it's used as the boundary. Otherwise
 								        a random boundary is generated.
 								    Reference: https://tools.ietf.org/html/rfc7578
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								    has_specified_boundary = boundary is not None
 								    while True:
 								        if boundary is None:
 								            boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
 								        try:
-												[utils] Rename try_multipart_encode to _multipart_encode_impl

To state that this is an internal function and people should be careful
when using it outside youtube-dl.

											
										
										
											2017-05-06 11:06:18 +00:00
+								            out, content_type = _multipart_encode_impl(data, boundary)
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								            break
 								        except ValueError:
 								            if has_specified_boundary:
 								                raise
 								            boundary = None
 								    return out, content_type
-												[utils] `traverse_obj`: More fixes (#6959)

- Fix result when branching with `traverse_string`
- Fix `slice` path on `dict`s
- Fix tests and docstrings from 21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e
- Add `is_iterable_like` helper function

Authored by: Grub4K
											
										
										
											2023-04-30 17:50:22 +00:00
+								def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
 								    if blocked_types is NO_DEFAULT:
 								        blocked_types = (str, bytes, collections.abc.Mapping)
 								    return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
 								def variadic(x, allowed_types=NO_DEFAULT):
-												Update to ytdl-commit-d1c6c5

[YouTube] [core] Improve platform debug log, based on yt-dlp
https://github.com/ytdl-org/youtube-dl/commit/d1c6c5c4d618fa950813c0c71aede34a5ac851e9

Except:
    * 6ed34338285f722d0da312ce0af3a15a077a3e2a [jsinterp] Add short-cut evaluation for common expression
        * There was no performance improvement when tested with https://github.com/ytdl-org/youtube-dl/issues/30641
    * e8de54bce50f6f77a4d7e8e80675f7003d5bf630 [core] Handle `/../` sequences in HTTP URLs
        * We plan to implement this differently

											
										
										
											2023-05-24 18:00:43 +00:00
+								    if not isinstance(allowed_types, (tuple, type)):
 								        deprecation_warning('allowed_types should be a tuple or a type')
 								        allowed_types = tuple(allowed_types)
-												[cleanup] Misc

Closes #7030, closes #6967

											
										
										
											2023-05-19 21:36:23 +00:00
+								    return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
-												[cleanup] Misc (#5044)

Authored by: gamer191, pukkandan
											
										
										
											2022-10-04 04:23:11 +00:00
-												[utils] Add `try_call`

											
										
										
											2022-03-31 07:49:16 +00:00
+								def try_call(*funcs, expected_type=None, args=[], kwargs={}):
 								    for f in funcs:
-												[extractor/common] Add support multiple getters in try_get

											
										
										
											2017-04-18 15:39:58 +00:00
+								        try:
-												[utils] Add `try_call`

											
										
										
											2022-03-31 07:49:16 +00:00
+								            val = f(*args, **kwargs)
-												[utils] `traverse_obj`: Rewrite, document and add tests (#5024)

Authored by: Grub4K
											
										
										
											2022-09-25 21:03:19 +00:00
+								        except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
-												[extractor/common] Add support multiple getters in try_get

											
										
										
											2017-04-18 15:39:58 +00:00
+								            pass
 								        else:
-												[utils] Add `try_call`

											
										
										
											2022-03-31 07:49:16 +00:00
+								            if expected_type is None or isinstance(val, expected_type):
 								                return val
 								def try_get(src, getter, expected_type=None):
 								    return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
-												[utils] Add try_get

To reduce boilerplate when accessing JSON

											
										
										
											2016-06-11 23:05:34 +00:00
-												[utils] Add `filter_dict`

											
										
										
											2022-03-28 02:51:45 +00:00
+								def filter_dict(dct, cndn=lambda _, v: v is not None):
 								    return {k: v for k, v in dct.items() if cndn(k, v)}
-												[utils] Introduce merge_dicts

											
										
										
											2018-04-27 19:47:17 +00:00
+								def merge_dicts(*dicts):
 								    merged = {}
 								    for a_dict in dicts:
 								        for k, v in a_dict.items():
-												[utils] Add `filter_dict`

											
										
										
											2022-03-28 02:51:45 +00:00
+								            if (v is not None and k not in merged
 								                    or isinstance(v, str) and merged[k] == ''):
-												[utils] Introduce merge_dicts

											
										
										
											2018-04-27 19:47:17 +00:00
+								                merged[k] = v
 								    return merged
-												[utils] Add encode_compat_str

											
										
										
											2015-12-20 00:26:26 +00:00
+								def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return string if isinstance(string, str) else str(string, encoding, errors)
-												[utils] Add encode_compat_str

											
										
										
											2015-12-20 00:26:26 +00:00
-												[utils] Add encode_dict

											
										
										
											2015-09-06 01:22:20 +00:00
-												[pbs] Add support for video ratings

											
										
										
											2014-03-20 23:59:51 +00:00
+								US_RATINGS = {
 								    'G': 0,
 								    'PG': 10,
 								    'PG-13': 13,
 								    'R': 16,
 								    'NC': 18,
 								}
-												[washingtonpost] Add extractor (Fixes #2622)

											
										
										
											2014-03-24 22:21:20 +00:00
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											2016-08-07 13:45:18 +00:00
+								TV_PARENTAL_GUIDELINES = {
-												[utils] keep the original TV_PARENTAL_GUIDELINES dict

											
										
										
											2018-05-25 22:12:18 +00:00
+								    'TV-Y': 0,
 								    'TV-Y7': 7,
 								    'TV-G': 0,
 								    'TV-PG': 0,
 								    'TV-14': 14,
 								    'TV-MA': 17,
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											2016-08-07 13:45:18 +00:00
+								}
-												[utils] Add parse_age_limit

											
										
										
											2014-10-03 12:37:25 +00:00
+								def parse_age_limit(s):
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								    # isinstance(False, int) is True. So type() must be used instead
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    if type(s) is int:  # noqa: E721
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											2016-08-07 13:45:18 +00:00
+								        return s if 0 <= s <= 21 else None
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								    elif not isinstance(s, str):
-												[utils] Default age_limit to None

If we can't parse it, it means we don't have any information, not that the content is unrestricted.

											
										
										
											2014-10-03 18:17:10 +00:00
+								        return None
-												[utils] Add parse_age_limit

											
										
										
											2014-10-03 12:37:25 +00:00
+								    m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											2016-08-07 13:45:18 +00:00
+								    if m:
 								        return int(m.group('age'))
-												[amcnetworks] Fix extractor (#179)

* Prefer use of manifest based on `releasePid` since the one based on `videoPid` may have Fairplay
* Additional thumbnail images were added
* Don't add `season_number` and `series` to `title`
* `series` is now set to `None` rather than "_" when empty
* fix bug with age limit

Authored by: 2ShedsJackson
											
										
										
											2021-03-20 10:41:11 +00:00
+								    s = s.upper()
-												[utils] Add support TV Parental Guidelines ratings in parse_age_limit

											
										
										
											2016-08-07 13:45:18 +00:00
+								    if s in US_RATINGS:
 								        return US_RATINGS[s]
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    m = re.match(r'^TV[_-]?({})$'.format('|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES)), s)
-												[utils] Relax TV Parental Guidelines matching

											
										
										
											2018-05-23 11:12:20 +00:00
+								    if m:
-												[utils] keep the original TV_PARENTAL_GUIDELINES dict

											
										
										
											2018-05-25 22:12:18 +00:00
+								        return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
-												[utils] Relax TV Parental Guidelines matching

											
										
										
											2018-05-23 11:12:20 +00:00
+								    return None
-												[utils] Add parse_age_limit

											
										
										
											2014-10-03 12:37:25 +00:00
-												[washingtonpost] Add extractor (Fixes #2622)

											
										
										
											2014-03-24 22:21:20 +00:00
+								def strip_jsonp(code):
-												[npo] Improve npo.nl (Fixes #4173)

											
										
										
											2014-11-13 15:28:05 +00:00
+								    return re.sub(
-												[utils] Recognize more patterns in strip_jsonp()

Used in Youku Show pages

											
										
										
											2017-05-26 13:58:18 +00:00
+								        r'''(?sx)^
-												[utils] Allow JSONP with empty func name (closes #17028)

											
										
										
											2018-07-21 05:30:18 +00:00
+								            (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
-												[utils] Recognize more patterns in strip_jsonp()

Used in Youku Show pages

											
										
										
											2017-05-26 13:58:18 +00:00
+								            (?:\s*&&\s*(?P=func_name))?
 								            \s*\(\s*(?P<callback_data>.*)\);?
 								            \s*?(?://[^\n]*)*$''',
 								        r'\g<callback_data>', code)
-												[clubic] Add extractor (Fixes #2773)

											
										
										
											2014-04-21 05:12:02 +00:00
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								def js_to_json(code, vars={}, *, strict=False):
-												[TrovoLive] Add extractor (partially fix #20)

Only VOD extractor has been implemented

Related: https://github.com/ytdl-org/youtube-dl/issues/26125
Related: https://github.com/blackjack4494/yt-dlc/issues/220

											
										
										
											2021-01-19 19:05:50 +00:00
+								    # vars is a dict of var, val pairs to substitute
-												[utils] `js_to_json`: Implement template strings (#6623)

Authored by: Grub4K
											
										
										
											2023-03-25 18:41:28 +00:00
+								    STRING_QUOTES = '\'"`'
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								    STRING_RE = '|'.join(rf'{q}(?:\\.|[^\\{q}])*{q}' for q in STRING_QUOTES)
-												[utils] Improve `js_to_json` comment regex
Capture the newline character as part of a single-line comment

From #497, Authored by: fstirlitz

											
										
										
											2021-07-13 07:18:20 +00:00
+								    COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*\n'
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								    SKIP_RE = fr'\s*(?:{COMMENT_RE})?\s*'
-												[utils] Improve comments processing in js_to_json (closes #11947)

											
										
										
											2017-02-02 19:55:06 +00:00
+								    INTEGER_TABLE = (
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        (fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
 								        (fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
-												[utils] Improve comments processing in js_to_json (closes #11947)

											
										
										
											2017-02-02 19:55:06 +00:00
+								    )
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								    def process_escape(match):
 								        JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
 								        escape = match.group(1) or match.group(2)
 								        return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
 								                else R'\u00' if escape == 'x'
 								                else '' if escape == '\n'
 								                else escape)
-												[utils] `js_to_json`: Implement template strings (#6623)

Authored by: Grub4K
											
										
										
											2023-03-25 18:41:28 +00:00
+								    def template_substitute(match):
 								        evaluated = js_to_json(match.group(1), vars, strict=strict)
 								        if evaluated[0] == '"':
 								            return json.loads(evaluated)
 								        return evaluated
-												[patreon] Simplify (#3390)

											
										
										
											2014-08-22 00:33:29 +00:00
+								    def fix_kv(m):
-												[utils] Improve and test js_to_json

											
										
										
											2014-09-30 09:12:59 +00:00
+								        v = m.group(0)
 								        if v in ('true', 'false', 'null'):
 								            return v
-												[SovietsCloset] Add extractor (#884)

Authored by: ChillingPepper
											
										
										
											2021-09-04 12:29:35 +00:00
+								        elif v in ('undefined', 'void 0'):
 								            return 'null'
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        elif v.startswith(('/*', '//', '!')) or v == ',':
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								            return ''
 								        if v[0] in STRING_QUOTES:
-												[utils] `js_to_json`: Implement template strings (#6623)

Authored by: Grub4K
											
										
										
											2023-03-25 18:41:28 +00:00
+								            v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
 								            escaped = re.sub(r'(?s)(")|\\(.)', process_escape, v)
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								            return f'"{escaped}"'
 								        for regex, base in INTEGER_TABLE:
 								            im = re.match(regex, v)
 								            if im:
 								                i = int(im.group(1), base)
 								                return f'"{i}":' if v.endswith(':') else str(i)
 								        if v in vars:
-												[utils] js_to_json: Fix bug in f55523c (#5771)

Authored by: ChillingPepper, pukkandan
											
										
										
											2022-12-30 06:38:38 +00:00
+								            try:
 								                if not strict:
 								                    json.loads(vars[v])
-												[cleanup] Misc

Closes #5576, closes #5887

											
										
										
											2023-01-02 14:09:03 +00:00
+								            except json.JSONDecodeError:
-												[utils] js_to_json: Fix bug in f55523c (#5771)

Authored by: ChillingPepper, pukkandan
											
										
										
											2022-12-30 06:38:38 +00:00
+								                return json.dumps(vars[v])
 								            else:
 								                return vars[v]
-												[utils] Process non-base 10 integers in js_to_json

											
										
										
											2016-05-14 14:39:58 +00:00
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								        if not strict:
 								            return f'"{v}"'
-												[TrovoLive] Add extractor (partially fix #20)

Only VOD extractor has been implemented

Related: https://github.com/ytdl-org/youtube-dl/issues/26125
Related: https://github.com/blackjack4494/yt-dlc/issues/220

											
										
										
											2021-01-19 19:05:50 +00:00
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								        raise ValueError(f'Unknown value: {v}')
-												[patreon] Simplify (#3390)

											
										
										
											2014-08-22 00:33:29 +00:00
-												[extractor/BiliIntl] Fix metadata extraction

Closes #4116

											
										
										
											2022-06-19 21:33:19 +00:00
+								    def create_map(mobj):
 								        return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
-												[utils] `js_to_json`: Handle `Array` objects

Authored by: Grub4K, std-move

Co-authored-by: std-move <26625259+std-move@users.noreply.github.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>

											
										
										
											2023-09-21 21:51:57 +00:00
+								    code = re.sub(r'(?:new\s+)?Array\((.*?)\)', r'[\g<1>]', code)
-												[extractor/BiliIntl] Fix metadata extraction

Closes #4116

											
										
										
											2022-06-19 21:33:19 +00:00
+								    code = re.sub(r'new Map\((\[.*?\])?\)', create_map, code)
-												[jsinterp] Handle new youtube signature functions

Closes #4635

											
										
										
											2022-08-13 23:21:54 +00:00
+								    if not strict:
-												[utils] `js_to_json`: Fix `Date` constructor parsing (#8295)

Authored by: awalgarg, Grub4K
											
										
										
											2023-10-07 23:57:23 +00:00
+								        code = re.sub(rf'new Date\(({STRING_RE})\)', r'\g<1>', code)
-												[utils] `js_to_json`: Improve

Closes #4900

											
										
										
											2022-09-23 13:51:07 +00:00
+								        code = re.sub(r'new \w+\((.*?)\)', lambda m: json.dumps(m.group(0)), code)
-												[extractor/txxx] Add extractors (#5240)

Authored by: chio0hai
Closes #5021
											
										
										
											2023-02-03 18:47:00 +00:00
+								        code = re.sub(r'parseInt\([^\d]+(\d+)[^\d]+\)', r'\1', code)
 								        code = re.sub(r'\(function\([^)]*\)\s*\{[^}]*\}\s*\)\s*\(\s*(["\'][^)]*["\'])\s*\)', r'\1', code)
-												[tubitv] Fix/improve TV series extraction (#2829)

Authored by: bbepis
											
										
										
											2022-02-19 12:00:51 +00:00
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								    return re.sub(rf'''(?sx)
 								        {STRING_RE}|
 								        {COMMENT_RE}|,(?={SKIP_RE}[\]}}])|
-												[SovietsCloset] Add extractor (#884)

Authored by: ChillingPepper
											
										
										
											2021-09-04 12:29:35 +00:00
+								        void\s0|(?:(?<![0-9])[eE]|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*|
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								        \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{SKIP_RE}:)?|
 								        [0-9]+(?={SKIP_RE}:)|
-												Merge 'ytdl-org/youtube-dl/master' release 2020.11.19

Old Extractors left behind:
	VLivePlaylistIE
	YoutubeSearchURLIE
	YoutubeShowIE
	YoutubeFavouritesIE

If removing old extractors, make corresponding changes in
	docs/supportedsites.md
	youtube_dlc/extractor/extractors.py

Not merged:
	.github/ISSUE_TEMPLATE/1_broken_site.md
	.github/ISSUE_TEMPLATE/2_site_support_request.md
	.github/ISSUE_TEMPLATE/3_site_feature_request.md
	.github/ISSUE_TEMPLATE/4_bug_report.md
	.github/ISSUE_TEMPLATE/5_feature_request.md
	test/test_all_urls.py
	youtube_dlc/version.py
	Changelog

											
										
										
											2020-11-19 19:22:59 +00:00
+								        !+
-												[utils] `js_to_json`: Improve escape handling (#5217)

Authored by: Grub4K
											
										
										
											2022-10-12 20:22:17 +00:00
+								        ''', fix_kv, code)
-												[patreon] Simplify (#3390)

											
										
										
											2014-08-22 00:33:29 +00:00
-												[clubic] Add extractor (Fixes #2773)

											
										
										
											2014-04-21 05:12:02 +00:00
+								def qualities(quality_ids):
 								    """ Get a numeric quality value out of a list of possible values """
 								    def q(qid):
 								        try:
 								            return quality_ids.index(qid)
 								        except ValueError:
 								            return -1
 								    return q
-												[YoutubeDL] Do not require default output template to be set

											
										
										
											2014-04-30 08:02:03 +00:00
-												Add pre-processor stage `video`

Related: #456, #5808

											
										
										
											2022-12-30 05:45:41 +00:00
+								POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
-												Allow `--exec` to be run at any post-processing stage

Deprecates `--exec-before-download`

											
										
										
											2022-01-03 11:13:54 +00:00
-												Multiple output templates for different file types

Syntax: -o common_template -o type:type_template
Types supported: subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson

											
										
										
											2021-02-03 13:36:09 +00:00
+								DEFAULT_OUTTMPL = {
 								    'default': '%(title)s [%(id)s].%(ext)s',
-												Split video by chapters (#158)


* New options `--split-chapters` and `--no-split-chapters`
* The output/path of the split files can be given using the key `chapter`
* Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
* Alias `--split-tracks` for parity with animelover/youtube-dl
* `--sponskrub-cut` and `--split-chapter` cannot work together

Closes:
https://github.com/blackjack4494/yt-dlc/issues/277
https://github.com/ytdl-org/youtube-dl/issues/28438
https://github.com/ytdl-org/youtube-dl/issues/12907
https://github.com/ytdl-org/youtube-dl/issues/6480
https://github.com/ytdl-org/youtube-dl/pull/25005

Rewritten from the implementation by: femaref and Wattux
https://github.com/Wattux/youtube-dl/tree/split-at-timestamps
https://github.com/ytdl-org/youtube-dl/pull/25005
https://github.com/femaref/youtube-dl/tree/split-track

											
										
										
											2021-03-14 23:02:13 +00:00
+								    'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
-												Multiple output templates for different file types

Syntax: -o common_template -o type:type_template
Types supported: subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson

											
										
										
											2021-02-03 13:36:09 +00:00
+								}
 								OUTTMPL_TYPES = {
-												Split video by chapters (#158)


* New options `--split-chapters` and `--no-split-chapters`
* The output/path of the split files can be given using the key `chapter`
* Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template
* Alias `--split-tracks` for parity with animelover/youtube-dl
* `--sponskrub-cut` and `--split-chapter` cannot work together

Closes:
https://github.com/blackjack4494/yt-dlc/issues/277
https://github.com/ytdl-org/youtube-dl/issues/28438
https://github.com/ytdl-org/youtube-dl/issues/12907
https://github.com/ytdl-org/youtube-dl/issues/6480
https://github.com/ytdl-org/youtube-dl/pull/25005

Rewritten from the implementation by: femaref and Wattux
https://github.com/Wattux/youtube-dl/tree/split-at-timestamps
https://github.com/ytdl-org/youtube-dl/pull/25005
https://github.com/femaref/youtube-dl/tree/split-track

											
										
										
											2021-03-14 23:02:13 +00:00
+								    'chapter': None,
-												Multiple output templates for different file types

Syntax: -o common_template -o type:type_template
Types supported: subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson

											
										
										
											2021-02-03 13:36:09 +00:00
+								    'subtitle': None,
 								    'thumbnail': None,
 								    'description': 'description',
 								    'annotation': 'annotations.xml',
 								    'infojson': 'info.json',
-												[outtmpl] Add type `link` for internet shortcut files
and refactor related code
Closes #1405

											
										
										
											2021-10-26 14:41:59 +00:00
+								    'link': None,
-												Add option `--concat-playlist`

Closes #1855, related: #382

											
										
										
											2022-01-13 11:01:08 +00:00
+								    'pl_video': None,
-												Add `pl_thumbnail` outtmpl key for playlist thumbnails
This should have been implemented in 681de68e9df67f07dde3fbbc6cb2e65a78b2bb16, but I forgot

											
										
										
											2021-05-17 20:10:21 +00:00
+								    'pl_thumbnail': None,
-												Multiple output templates for different file types

Syntax: -o common_template -o type:type_template
Types supported: subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson

											
										
										
											2021-02-03 13:36:09 +00:00
+								    'pl_description': 'description',
 								    'pl_infojson': 'info.json',
 								}
-												Provide compatibility  check_output for 2.6 (Fixes #2926)

											
										
										
											2014-05-16 10:03:59 +00:00
-												Parse metadata from multiple fields

Closes #196

											
										
										
											2021-03-24 22:02:15 +00:00
+								# As of [1] format syntax is:
 								#  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 								# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
-												Expand and escape environment variables correctly in outtmpl

Fixes: https://www.reddit.com/r/youtubedl/comments/otfmq3/ytdlp_same_parameters_different_results

											
										
										
											2021-07-28 23:49:26 +00:00
+								STR_FORMAT_RE_TMPL = r'''(?x)
 								    (?<!%)(?P<prefix>(?:%%)*)
-												Parse metadata from multiple fields

Closes #196

											
										
										
											2021-03-24 22:02:15 +00:00
+								    %
-												[outtmpl] Format type `U` for unicode normalization

											
										
										
											2021-09-25 20:09:44 +00:00
+								    (?P<has_key>\((?P<key>{0})\))?
-												Fix and refactor `prepare_outtmpl`

The following tests would have failed previously:
%(id)d %(id)r
%(ext)s-%(ext|def)d
%(width|)d
%(id)r %(height)r
%(formats.0)r
%s

											
										
										
											2021-06-03 18:00:38 +00:00
+								    (?P<format>
-												[outtmpl] Format type `U` for unicode normalization

											
										
										
											2021-09-25 20:09:44 +00:00
+								        (?P<conversion>[#0\-+ ]+)?
 								        (?P<min_width>\d+)?
 								        (?P<precision>\.\d+)?
 								        (?P<len_mod>[hlL])?  # unused in python
-												Expand and escape environment variables correctly in outtmpl

Fixes: https://www.reddit.com/r/youtubedl/comments/otfmq3/ytdlp_same_parameters_different_results

											
										
										
											2021-07-28 23:49:26 +00:00
+								        {1}  # conversion type
-												Fix and refactor `prepare_outtmpl`

The following tests would have failed previously:
%(id)d %(id)r
%(ext)s-%(ext|def)d
%(width|)d
%(id)r %(height)r
%(formats.0)r
%s

											
										
										
											2021-06-03 18:00:38 +00:00
+								    )
-												Parse metadata from multiple fields

Closes #196

											
										
										
											2021-03-24 22:02:15 +00:00
+								'''
-												Add format types `j`, `l`, `q` for outtmpl

Closes #345

											
										
										
											2021-07-29 02:56:17 +00:00
-												[outtmpl] Fix some minor bugs

Closes #7164

											
										
										
											2023-06-20 23:45:03 +00:00
+								STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											2014-09-15 13:10:24 +00:00
-												Add format types `j`, `l`, `q` for outtmpl

Closes #345

											
										
										
											2021-07-29 02:56:17 +00:00
-												[facebook] Fix support for untitled videos (Fixes #3757)

											
										
										
											2014-09-15 13:10:24 +00:00
+								def limit_length(s, length):
 								    """ Add ellipses to overly long strings """
 								    if s is None:
 								        return None
 								    ELLIPSES = '...'
 								    if len(s) > length:
 								        return s[:length - len(ELLIPSES)] + ELLIPSES
 								    return s
-												[ffmpeg] Warn if ffmpeg/avconv version is too old (Fixes #4026)

											
										
										
											2014-10-26 15:46:34 +00:00
 								def version_tuple(v):
-												[ffmpeg] Improve version check and call it from hls (Fixes #4377)

											
										
										
											2014-12-06 11:14:26 +00:00
+								    return tuple(int(e) for e in re.split(r'[-.]', v))
-												[ffmpeg] Warn if ffmpeg/avconv version is too old (Fixes #4026)

											
										
										
											2014-10-26 15:46:34 +00:00
 								def is_outdated_version(version, limit, assume_new=True):
 								    if not version:
 								        return not assume_new
 								    try:
 								        return version_tuple(version) < version_tuple(limit)
 								    except ValueError:
 								        return not assume_new
-												[utils] Improve update on error message somewhat

We still may want to implement a bulletproof check for the current version, and a better place to add this message so that it works for all kind of other errors too.

											
										
										
											2014-11-20 11:14:28 +00:00
 								def ytdl_is_updateable():
-												Completely change project name to yt-dlp (#85)

* All modules and binary names are changed
* All documentation references changed
* yt-dlp no longer loads youtube-dlc config files
* All URLs changed to point to organization account

Co-authored-by: Pccode66
Co-authored-by: pukkandan
											
										
										
											2021-02-24 18:45:56 +00:00
+								    """ Returns if yt-dlp can be updated with -U """
-												Disable Updates

											
										
										
											2021-01-06 11:58:30 +00:00
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    from ..update import is_non_updateable
-												[utils] Improve update on error message somewhat

We still may want to implement a bulletproof check for the current version, and a better place to add this message so that it works for all kind of other errors too.

											
										
										
											2014-11-20 11:14:28 +00:00
-												[build] Allow building with py2exe (and misc fixes)
py2exe config is copied from youtube-dl
Closes #1160

											
										
										
											2021-10-03 20:55:13 +00:00
+								    return not is_non_updateable()
-												Provide guidance when called with a YouTube ID starting with a dash.

Reported at https://news.ycombinator.com/item?id=8648121

											
										
										
											2014-11-23 09:49:19 +00:00
 								def args_to_str(args):
 								    # Get a short string representation for a subprocess command
-												[core] Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)

The shell escape function now properly escapes `%`, `\\` and `\n`. `utils.Popen` as well as `%q` output template expansion have been patched accordingly.

Prior to this fix using `--exec` together with `%q` when on Windows could cause remote code to execute. See https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p for more details.

Authored by: Grub4K

											
										
										
											2024-04-08 21:18:04 +00:00
+								    return shell_quote(args)
-												[soulanime] Fix under Python 3

											
										
										
											2015-01-04 01:20:45 +00:00
-												[cleanup] Misc fixes

Closes https://github.com/yt-dlp/yt-dlp/pull/3213, Closes https://github.com/yt-dlp/yt-dlp/pull/3117

Related: https://github.com/yt-dlp/yt-dlp/issues/3146#issuecomment-1077323114, https://github.com/yt-dlp/yt-dlp/pull/3277#discussion_r841019671, https://github.com/yt-dlp/yt-dlp/commit/a825ffbffa0bea322e3ccb44c6f8e01d8d9572fb#commitcomment-68538986, https://github.com/yt-dlp/yt-dlp/issues/2360, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393519, https://github.com/yt-dlp/yt-dlp/commit/5fa3c9a88f597625296981a4a26be723e65d4842#r70393254

											
										
										
											2022-03-27 02:20:43 +00:00
+								def error_to_str(err):
 								    return f'{type(err).__name__}: {err}'
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								def mimetype2ext(mt, default=NO_DEFAULT):
 								    if not isinstance(mt, str):
 								        if default is not NO_DEFAULT:
 								            return default
-												[utils] Allow None mimetypes in mimetype2ext

											
										
										
											2016-04-24 18:03:12 +00:00
+								        return None
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								    MAP = {
 								        # video
-												[utils] Add more items to mimetype2ext (#8293)

These are used in Youtube formats

											
										
										
											2016-01-24 16:58:53 +00:00
+								        '3gpp': '3gp',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'mp2t': 'ts',
 								        'mp4': 'mp4',
 								        'mpeg': 'mpeg',
 								        'mpegurl': 'm3u8',
 								        'quicktime': 'mov',
 								        'webm': 'webm',
 								        'vp9': 'vp9',
-												[ie/bpb] Overhaul extractor (#8119)

Authored by: Grub4K
											
										
										
											2023-09-16 15:50:06 +00:00
+								        'video/ogg': 'ogv',
-												[utils] Add more items to mimetype2ext (#8293)

These are used in Youtube formats

											
										
										
											2016-01-24 16:58:53 +00:00
+								        'x-flv': 'flv',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'x-m4v': 'm4v',
 								        'x-matroska': 'mkv',
 								        'x-mng': 'mng',
-												[utils] Reorder items in mimetype2ext alphabetically

											
										
										
											2016-01-24 17:01:15 +00:00
+								        'x-mp4-fragmented': 'mp4',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'x-ms-asf': 'asf',
-												[utils] Reorder items in mimetype2ext alphabetically

											
										
										
											2016-01-24 17:01:15 +00:00
+								        'x-ms-wmv': 'wmv',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'x-msvideo': 'avi',
 								        # application (streaming playlists)
-												[utils] add mimetypes to determine manifest ext(m3u8, f4m, mpd)

											
										
										
											2016-07-06 08:06:28 +00:00
+								        'dash+xml': 'mpd',
 								        'f4m+xml': 'f4m',
-												[utils] Add another f4m mimetype to mimetype2ext

											
										
										
											2016-07-23 09:48:59 +00:00
+								        'hds+xml': 'f4m',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'vnd.apple.mpegurl': 'm3u8',
-												[brightcove] skip ism manifests

											
										
										
											2016-07-14 13:13:57 +00:00
+								        'vnd.ms-sstr+xml': 'ism',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'x-mpegurl': 'm3u8',
 								        # audio
 								        'audio/mp4': 'm4a',
 								        # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
 								        # Using .mp3 as it's the most popular one
 								        'audio/mpeg': 'mp3',
-												[utils] `mimetype2ext`: weba is not standard

Fix bug in fbb73833067ba742459729809679a62f34b3e41e, 2647c933b8ed22f95dd8e9866c4db031867a1bc8
Closes #5935

											
										
										
											2023-01-03 02:35:45 +00:00
+								        'audio/webm': 'webm',
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        'audio/x-matroska': 'mka',
 								        'audio/x-mpegurl': 'm3u',
 								        'midi': 'mid',
 								        'ogg': 'ogg',
 								        'wav': 'wav',
 								        'wave': 'wav',
 								        'x-aac': 'aac',
 								        'x-flac': 'flac',
 								        'x-m4a': 'm4a',
 								        'x-realaudio': 'ra',
-												Merge branch 'ytdl-org-master'


											
										
										
											2020-09-12 03:08:57 +00:00
+								        'x-wav': 'wav',
-												[extractor] Extract storyboards from SMIL manifests (#1128)

Authored by: fstirlitz
											
										
										
											2021-10-02 18:43:42 +00:00
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        # image
 								        'avif': 'avif',
 								        'bmp': 'bmp',
 								        'gif': 'gif',
 								        'jpeg': 'jpg',
 								        'png': 'png',
 								        'svg+xml': 'svg',
 								        'tiff': 'tif',
 								        'vnd.wap.wbmp': 'wbmp',
 								        'webp': 'webp',
 								        'x-icon': 'ico',
 								        'x-jng': 'jng',
 								        'x-ms-bmp': 'bmp',
 								        # caption
 								        'filmstrip+json': 'fs',
 								        'smptett+xml': 'tt',
 								        'ttaf+xml': 'dfxp',
 								        'ttml+xml': 'ttml',
 								        'x-ms-sami': 'sami',
-												[extractor] Extract storyboards from SMIL manifests (#1128)

Authored by: fstirlitz
											
										
										
											2021-10-02 18:43:42 +00:00
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								        # misc
 								        'gzip': 'gz',
-												[extractor] Extract storyboards from SMIL manifests (#1128)

Authored by: fstirlitz
											
										
										
											2021-10-02 18:43:42 +00:00
+								        'json': 'json',
 								        'xml': 'xml',
 								        'zip': 'zip',
 								    }
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								    mimetype = mt.partition(';')[0].strip().lower()
 								    _, _, subtype = mimetype.rpartition('/')
-												[extractor] Extract storyboards from SMIL manifests (#1128)

Authored by: fstirlitz
											
										
										
											2021-10-02 18:43:42 +00:00
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								    if ext:
 								        return ext
 								    elif default is not NO_DEFAULT:
 								        return default
-												[extractor] Extract storyboards from SMIL manifests (#1128)

Authored by: fstirlitz
											
										
										
											2021-10-02 18:43:42 +00:00
+								    return subtype.replace('+', '.')
-												[sandia] Add new extractor (#4974)

											
										
										
											2015-02-18 23:31:01 +00:00
-												[skeb] Add extractor (#1916)

Fixes: https://github.com/ytdl-org/youtube-dl/issues/30287
Authored by: nao20010128nao
											
										
										
											2021-12-09 11:40:52 +00:00
+								def ext2mimetype(ext_or_url):
 								    if not ext_or_url:
 								        return None
 								    if '.' not in ext_or_url:
 								        ext_or_url = f'file.{ext_or_url}'
 								    return mimetypes.guess_type(ext_or_url)[0]
-												[utils] add helper function for parsing codecs

											
										
										
											2016-03-16 17:48:06 +00:00
+								def parse_codecs(codecs_str):
 								    # http://tools.ietf.org/html/rfc6381
 								    if not codecs_str:
 								        return {}
-												Updated to release 2020.11.21.1

											
										
										
											2020-11-21 14:50:42 +00:00
+								    split_codecs = list(filter(None, map(
-												[cleanup] Refactor some code

											
										
										
											2021-07-31 10:51:01 +00:00
+								        str.strip, codecs_str.strip().strip(',').split(','))))
-												[cleanup] Misc fixes (see desc)

* Do not warn when fixup is skipped for existing file
* [fragment] Fix `--skip-unavailable-fragments` for HTTP Errors
* [utils] write_string: Fix bug in 59f943cd5097e9bdbc3cb3e6b5675e43d369341a
* [utils] parse_codecs: Subtitle codec is generally referred to as `scodec`. https://github.com/yt-dlp/yt-dlp/pull/2174#discussion_r790156048
* [docs] Remove note about permissions. Closes #3597

											
										
										
											2022-04-29 16:02:31 +00:00
+								    vcodec, acodec, scodec, hdr = None, None, None, None
-												Updated to release 2020.11.21.1

											
										
										
											2020-11-21 14:50:42 +00:00
+								    for full_codec in split_codecs:
-												[utils] `parse_codecs`: Fix parsing of mixed case codec strings

Authored by: bashonly

											
										
										
											2024-07-14 19:58:07 +00:00
+								        full_codec = re.sub(r'^([^.]+)', lambda m: m.group(1).lower(), full_codec)
-												[utils, cleanup] Refactor parse_codecs

											
										
										
											2022-07-10 11:20:54 +00:00
+								        parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
 								        if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
 								                        'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
 								            if vcodec:
 								                continue
 								            vcodec = full_codec
 								            if parts[0] in ('dvh1', 'dvhe'):
 								                hdr = 'DV'
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								            elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
-												[utils, cleanup] Refactor parse_codecs

											
										
										
											2022-07-10 11:20:54 +00:00
+								                hdr = 'HDR10'
 								            elif parts[:2] == ['vp9', '2']:
 								                hdr = 'HDR10'
-												Add `ac4` to known codecs

Note: ffmpeg does not currently support this format

Related #5738

											
										
										
											2022-12-09 09:47:16 +00:00
+								        elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
-												[utils, cleanup] Refactor parse_codecs

											
										
										
											2022-07-10 11:20:54 +00:00
+								                          'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
 								            acodec = acodec or full_codec
 								        elif parts[0] in ('stpp', 'wvtt'):
 								            scodec = scodec or full_codec
-												[utils] add helper function for parsing codecs

											
										
										
											2016-03-16 17:48:06 +00:00
+								        else:
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
+								            write_string(f'WARNING: Unknown codec {full_codec}\n')
-												[cleanup] Misc fixes (see desc)

* Do not warn when fixup is skipped for existing file
* [fragment] Fix `--skip-unavailable-fragments` for HTTP Errors
* [utils] write_string: Fix bug in 59f943cd5097e9bdbc3cb3e6b5675e43d369341a
* [utils] parse_codecs: Subtitle codec is generally referred to as `scodec`. https://github.com/yt-dlp/yt-dlp/pull/2174#discussion_r790156048
* [docs] Remove note about permissions. Closes #3597

											
										
										
											2022-04-29 16:02:31 +00:00
+								    if vcodec or acodec or scodec:
-												[utils] add helper function for parsing codecs

											
										
										
											2016-03-16 17:48:06 +00:00
+								        return {
 								            'vcodec': vcodec or 'none',
 								            'acodec': acodec or 'none',
-												Add HDR information to formats

											
										
										
											2021-10-18 13:04:21 +00:00
+								            'dynamic_range': hdr,
-												[cleanup] Misc fixes (see desc)

* Do not warn when fixup is skipped for existing file
* [fragment] Fix `--skip-unavailable-fragments` for HTTP Errors
* [utils] write_string: Fix bug in 59f943cd5097e9bdbc3cb3e6b5675e43d369341a
* [utils] parse_codecs: Subtitle codec is generally referred to as `scodec`. https://github.com/yt-dlp/yt-dlp/pull/2174#discussion_r790156048
* [docs] Remove note about permissions. Closes #3597

											
										
										
											2022-04-29 16:02:31 +00:00
+								            **({'scodec': scodec} if scodec is not None else {}),
-												[utils] add helper function for parsing codecs

											
										
										
											2016-03-16 17:48:06 +00:00
+								        }
-												[cleanup] Misc cleanup
Closes #1942 #1976 #2020 #2058 #1984

											
										
										
											2021-12-23 01:42:26 +00:00
+								    elif len(split_codecs) == 2:
 								        return {
 								            'vcodec': split_codecs[0],
 								            'acodec': split_codecs[1],
 								        }
-												[utils] add helper function for parsing codecs

											
										
										
											2016-03-16 17:48:06 +00:00
+								    return {}
-												Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
											
										
										
											2022-08-04 00:42:12 +00:00
+								def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
 								    assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
 								    allow_mkv = not preferences or 'mkv' in preferences
 								    if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
 								        return 'mkv'  # TODO: any other format allows this?
 								    # TODO: All codecs supported by parse_codecs isn't handled here
 								    COMPATIBLE_CODECS = {
 								        'mp4': {
-												Add `ac4` to known codecs

Note: ffmpeg does not currently support this format

Related #5738

											
										
										
											2022-12-09 09:47:16 +00:00
+								            'av1', 'hevc', 'avc1', 'mp4a', 'ac-4',  # fourcc (m3u8, mpd)
-												[downloader/ism] Support ec-3 codec (#5004)

Closes #296
Authored by: nixxo
											
										
										
											2022-09-30 17:33:29 +00:00
+								            'h264', 'aacl', 'ec-3',  # Set in ISM
-												Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
											
										
										
											2022-08-04 00:42:12 +00:00
+								        },
 								        'webm': {
 								            'av1', 'vp9', 'vp8', 'opus', 'vrbs',
 								            'vp9x', 'vp8x',  # in the webm spec
 								        },
 								    }
-												[cleanup] Misc

											
										
										
											2023-06-22 04:32:38 +00:00
+								    sanitize_codec = functools.partial(
 								        try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
-												[utils] Fix `get_compatible_ext`

Closes #4647

											
										
										
											2022-08-14 01:47:11 +00:00
+								    vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
-												Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
											
										
										
											2022-08-04 00:42:12 +00:00
 								    for ext in preferences or COMPATIBLE_CODECS.keys():
 								        codec_set = COMPATIBLE_CODECS.get(ext, set())
 								        if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
 								            return ext
 								    COMPATIBLE_EXTS = (
 								        {'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
-												Add `weba` to known extensions

											
										
										
											2022-12-30 10:00:56 +00:00
+								        {'webm', 'weba'},
-												Determine merge container better (See desc) (#1482)

* Determine the container early. Closes #4069
* Use codecs instead of just file extensions
* Obey `--prefer-free-formats`
* Allow fallbacks in `--merge-output`

Authored by: pukkandan, selfisekai
											
										
										
											2022-08-04 00:42:12 +00:00
+								    )
 								    for ext in preferences or vexts:
 								        current_exts = {ext, *vexts, *aexts}
 								        if ext == 'mkv' or current_exts == {ext} or any(
 								                ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
 								            return ext
 								    return 'mkv' if allow_mkv else preferences[-1]
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
-												[utils] Fix getheader in urlhandle_detect_ext

Fixes #7049, related to #9440

											
										
										
											2016-05-15 07:32:54 +00:00
+								    getheader = url_handle.headers.get
-												[soulanime] Fix under Python 3

											
										
										
											2015-01-04 01:20:45 +00:00
-												[hearthisat] Add support for more high-quality download links

											
										
										
											2015-01-22 11:04:07 +00:00
+								    cd = getheader('Content-Disposition')
 								    if cd:
 								        m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
 								        if m:
 								            e = determine_ext(m.group('filename'), default_ext=None)
 								            if e:
 								                return e
-												[extractor/wistia] Improve extension detection (#5415)

Closes #5053
Authored by: bashonly, Grub4k, pukkandan
											
										
										
											2022-12-29 16:32:54 +00:00
+								    meta_ext = getheader('x-amz-meta-name')
 								    if meta_ext:
 								        e = meta_ext.rpartition('.')[2]
 								        if e:
 								            return e
 								    return mimetype2ext(getheader('Content-Type'), default=default)
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											2015-01-07 06:20:20 +00:00
-												[letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.

											
										
										
											2015-07-22 12:03:05 +00:00
+								def encode_data_uri(data, mime_type):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return 'data:{};base64,{}'.format(mime_type, base64.b64encode(data).decode('ascii'))
-												[letv] Fix extraction

Using data URIs for passing the decrypted M3U8 manifest, which is
supported by ffmpeg only.

											
										
										
											2015-07-22 12:03:05 +00:00
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											2015-01-07 06:20:20 +00:00
+								def age_restricted(content_limit, age_limit):
-												Revert "fix typos"

This reverts commit 36a0e46c39ea4f211dea9944177976e8f8364736.

											
										
										
											2016-01-10 18:27:22 +00:00
+								    """ Returns True iff the content should be blocked """
-												Respect age_limit when listing extractors (Fixes #4653)

											
										
										
											2015-01-07 06:20:20 +00:00
 								    if age_limit is None:  # No limit set
 								        return False
 								    if content_limit is None:
 								        return False  # Content available for everyone
 								    return age_limit < content_limit
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
-												Fix a904a7f8c6edc42046f0a78fb279739d500d4887

											
										
										
											2022-07-15 16:14:07 +00:00
+								# List of known byte-order-marks (BOM)
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								BOMS = [
 								    (b'\xef\xbb\xbf', 'utf-8'),
 								    (b'\x00\x00\xfe\xff', 'utf-32-be'),
 								    (b'\xff\xfe\x00\x00', 'utf-32-le'),
 								    (b'\xff\xfe', 'utf-16-le'),
 								    (b'\xfe\xff', 'utf-16-be'),
 								]
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
+								def is_html(first_bytes):
 								    """ Detect whether a file contains HTML by examining its first bytes. """
-												[utils] `is_html`: Handle double BOM

Closes #2885

											
										
										
											2022-05-18 01:12:43 +00:00
+								    encoding = 'utf-8'
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
+								    for bom, enc in BOMS:
-												[utils] `is_html`: Handle double BOM

Closes #2885

											
										
										
											2022-05-18 01:12:43 +00:00
+								        while first_bytes.startswith(bom):
 								            encoding, first_bytes = enc, first_bytes[len(bom):]
-												[generic] Add support for BOMs (Fixes #4753)

											
										
										
											2015-01-23 00:21:30 +00:00
-												[utils] `is_html`: Handle double BOM

Closes #2885

											
										
										
											2022-05-18 01:12:43 +00:00
+								    return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
-												[downloader] Improve downloader selection

											
										
										
											2015-01-23 22:50:31 +00:00
 								def determine_protocol(info_dict):
 								    protocol = info_dict.get('protocol')
 								    if protocol is not None:
 								        return protocol
-												[utils] Sanitize URL when determining protocol
Closes #1406

											
										
										
											2021-10-26 14:01:56 +00:00
+								    url = sanitize_url(info_dict['url'])
-												[downloader] Improve downloader selection

											
										
										
											2015-01-23 22:50:31 +00:00
+								    if url.startswith('rtmp'):
 								        return 'rtmp'
 								    elif url.startswith('mms'):
 								        return 'mms'
 								    elif url.startswith('rtsp'):
 								        return 'rtsp'
 								    ext = determine_ext(url)
 								    if ext == 'm3u8':
-												[cleanup] Misc

											
										
										
											2022-09-09 22:16:54 +00:00
+								        return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
-												[downloader] Improve downloader selection

											
										
										
											2015-01-23 22:50:31 +00:00
+								    elif ext == 'f4m':
 								        return 'f4m'
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    return urllib.parse.urlparse(url).scheme
-												Add --list-thumbnails

											
										
										
											2015-01-25 01:38:47 +00:00
-												[utils] Allow alignment in `render_table`
and add tests

											
										
										
											2021-11-20 03:03:51 +00:00
+								def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
 								    """ Render a list of rows, each as a list of values.
 								    Text after a \t will be right aligned """
-												[minicurses] Add more colors

											
										
										
											2021-10-20 16:37:32 +00:00
+								    def width(string):
-												[utils] Allow alignment in `render_table`
and add tests

											
										
										
											2021-11-20 03:03:51 +00:00
+								        return len(remove_terminal_sequences(string).replace('\t', ''))
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
 								    def get_max_lens(table):
-												[minicurses] Add more colors

											
										
										
											2021-10-20 16:37:32 +00:00
+								        return [max(width(str(v)) for v in col) for col in zip(*table)]
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    def filter_using_list(row, filter_array):
 								        return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take]
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
-												Fix `--compat-options list-formats`
Closes #2481

											
										
										
											2022-02-02 00:38:40 +00:00
+								    max_lens = get_max_lens(data) if hide_empty else []
 								    header_row = filter_using_list(header_row, max_lens)
 								    data = [filter_using_list(row, max_lens) for row in data]
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    table = [header_row, *data]
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
+								    max_lens = get_max_lens(table)
-												[utils] Allow alignment in `render_table`
and add tests

											
										
										
											2021-11-20 03:03:51 +00:00
+								    extra_gap += 1
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
+								    if delim:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        table = [header_row, [delim * (ml + extra_gap) for ml in max_lens], *data]
-												[utils] render_table: Fix character calculation for removing extra gap

without this fix, the column next to delimiter will lack leading spaces on terminal (see https://github.com/yt-dlp/yt-dlp/pull/920#issuecomment-1059914615 for the situation)

											
										
										
											2022-03-06 08:11:10 +00:00
+								        table[1][-1] = table[1][-1][:-extra_gap * len(delim)]  # Remove extra_gap from end of delimiter
-												[minicurses] Add more colors

											
										
										
											2021-10-20 16:37:32 +00:00
+								    for row in table:
 								        for pos, text in enumerate(map(str, row)):
-												[utils] Allow alignment in `render_table`
and add tests

											
										
										
											2021-11-20 03:03:51 +00:00
+								            if '\t' in text:
 								                row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
 								            else:
 								                row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return '\n'.join(''.join(row).rstrip() for row in table)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
-												Let `--match-filter` reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`

											
										
										
											2021-08-15 08:12:23 +00:00
+								def _match_one(filter_part, dct, incomplete):
-												Add all format filtering operators also to `--match-filter`

PR: https://github.com/ytdl-org/youtube-dl/pull/27361

Authored by: max-te

											
										
										
											2021-06-13 14:25:19 +00:00
+								    # TODO: Generalize code with YoutubeDL._build_format_filter
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
+								    STRING_OPERATORS = {
 								        '*=': operator.contains,
 								        '^=': lambda attr, value: attr.startswith(value),
 								        '$=': lambda attr, value: attr.endswith(value),
 								        '~=': lambda attr, value: re.search(value, attr),
 								    }
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    COMPARISON_OPERATORS = {
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
+								        **STRING_OPERATORS,
 								        '<=': operator.le,  # "<=" must be defined above "<"
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        '<': operator.lt,
 								        '>=': operator.ge,
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
+								        '>': operator.gt,
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        '=': operator.eq,
 								    }
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
-												Ignore format-specific fields in initial pass of `--match-filter`

Closes #3074

											
										
										
											2022-03-25 08:36:46 +00:00
+								    if isinstance(incomplete, bool):
 								        is_incomplete = lambda _: incomplete
 								    else:
 								        is_incomplete = lambda k: k in incomplete
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								    operator_rex = re.compile(r'''(?x)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        (?P<key>[a-z_]+)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        \s*(?P<negation>!\s*)?(?P<op>{})(?P<none_inclusive>\s*\?)?\s*
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        (?:
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
+								            (?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)|
 								            (?P<strval>.+?)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        )
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        '''.format('|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))))
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								    m = operator_rex.fullmatch(filter_part.strip())
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    if m:
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								        m = m.groupdict()
 								        unnegated_op = COMPARISON_OPERATORS[m['op']]
 								        if m['negation']:
-												Add all format filtering operators also to `--match-filter`

PR: https://github.com/ytdl-org/youtube-dl/pull/27361

Authored by: max-te

											
										
										
											2021-06-13 14:25:19 +00:00
+								            op = lambda attr, value: not unnegated_op(attr, value)
 								        else:
 								            op = unnegated_op
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								        comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
 								        if m['quote']:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            comparison_value = comparison_value.replace(r'\{}'.format(m['quote']), m['quote'])
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								        actual_value = dct.get(m['key'])
 								        numeric_comparison = None
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								        if isinstance(actual_value, (int, float)):
-												[utils] Fix --match-filter for int-like strings (closes #11082)

											
										
										
											2016-10-31 16:32:08 +00:00
+								            # If the original field is a string and matching comparisonvalue is
 								            # a number we should respect the origin of the original field
 								            # and process comparison value as a string (see
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								            # https://github.com/ytdl-org/youtube-dl/issues/11082)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								            try:
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								                numeric_comparison = int(comparison_value)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								            except ValueError:
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								                numeric_comparison = parse_filesize(comparison_value)
 								                if numeric_comparison is None:
 								                    numeric_comparison = parse_filesize(f'{comparison_value}B')
 								                if numeric_comparison is None:
 								                    numeric_comparison = parse_duration(comparison_value)
 								        if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            raise ValueError('Operator {} only supports string values!'.format(m['op']))
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        if actual_value is None:
-												Ignore format-specific fields in initial pass of `--match-filter`

Closes #3074

											
										
										
											2022-03-25 08:36:46 +00:00
+								            return is_incomplete(m['key']) or m['none_inclusive']
-												[utils] Allow duration strings in filter
Closes #1309

											
										
										
											2021-10-16 19:34:00 +00:00
+								        return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
 								    UNARY_OPERATORS = {
-												[utils] Fix match_str for boolean meta fields

											
										
										
											2018-04-24 16:49:30 +00:00
+								        '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
 								        '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    }
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								    operator_rex = re.compile(r'''(?x)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        (?P<op>{})\s*(?P<key>[a-z_]+)
 								        '''.format('|'.join(map(re.escape, UNARY_OPERATORS.keys()))))
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								    m = operator_rex.fullmatch(filter_part.strip())
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    if m:
 								        op = UNARY_OPERATORS[m.group('op')]
 								        actual_value = dct.get(m.group('key'))
-												Ignore format-specific fields in initial pass of `--match-filter`

Closes #3074

											
										
										
											2022-03-25 08:36:46 +00:00
+								        if is_incomplete(m.group('key')) and actual_value is None:
-												Let `--match-filter` reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`

											
										
										
											2021-08-15 08:12:23 +00:00
+								            return True
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        return op(actual_value)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    raise ValueError(f'Invalid filter part {filter_part!r}')
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
-												Let `--match-filter` reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`

											
										
										
											2021-08-15 08:12:23 +00:00
+								def match_str(filter_str, dct, incomplete=False):
-												Ignore format-specific fields in initial pass of `--match-filter`

Closes #3074

											
										
										
											2022-03-25 08:36:46 +00:00
+								    """ Filter a dictionary with a simple string syntax.
 								    @returns           Whether the filter passes
 								    @param incomplete  Set of keys that is expected to be missing from dct.
 								                       Can be True/False to indicate all/none of the keys may be missing.
 								                       All conditions on incomplete keys pass if the key is missing
-												Let `--match-filter` reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`

											
										
										
											2021-08-15 08:12:23 +00:00
+								    """
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    return all(
-												Let `--match-filter` reject entries early
Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views`

											
										
										
											2021-08-15 08:12:23 +00:00
+								        _match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
-												Add regex to `--match-filter`

This does not fully deprecate `--match-title`/`--reject-title`
since `--match-filter` is only checked after the extraction is complete,
while `--match-title` can often be checked from the flat playlist.

Fixes: https://github.com/ytdl-org/youtube-dl/issues/9092, https://github.com/ytdl-org/youtube-dl/issues/23035

											
										
										
											2021-08-04 21:31:23 +00:00
+								        for filter_part in re.split(r'(?<!\\)&', filter_str))
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
-												Add option `--break-match-filters`

* Deprecates `--break-on-reject`

Closes #5962

											
										
										
											2023-03-03 19:43:05 +00:00
+								def match_filter_func(filters, breaking_filters=None):
 								    if not filters and not breaking_filters:
-												[cleanup] Refactor `__init__.py` (#2570)

* Split `__init__` code into multiple functions
* Clean up validation code by grouping similar types of options
* Expose `parse_options` to third parties 
											
										
										
											2022-03-08 20:03:31 +00:00
+								        return None
-												[utils] Improve `repr` of `DateRange`, `match_filter_func`

											
										
										
											2024-01-31 10:27:37 +00:00
+								    repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'
-												Add option `--break-match-filters`

* Deprecates `--break-on-reject`

Closes #5962

											
										
										
											2023-03-03 19:43:05 +00:00
+								    breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
 								    filters = set(variadic(filters or []))
-												[cleanup] Refactor `__init__.py` (#2570)

* Split `__init__` code into multiple functions
* Clean up validation code by grouping similar types of options
* Expose `parse_options` to third parties 
											
										
										
											2022-03-08 20:03:31 +00:00
-												`--match-filter -` to interactively ask for each video

											
										
										
											2022-04-28 14:33:26 +00:00
+								    interactive = '-' in filters
 								    if interactive:
 								        filters.remove('-')
-												[utils] Improve `repr` of `DateRange`, `match_filter_func`

											
										
										
											2024-01-31 10:27:37 +00:00
+								    @function_with_repr.set_repr(repr_)
-												`--match-filter -` to interactively ask for each video

											
										
										
											2022-04-28 14:33:26 +00:00
+								    def _match_func(info_dict, incomplete=False):
-												Add option `--break-match-filters`

* Deprecates `--break-on-reject`

Closes #5962

											
										
										
											2023-03-03 19:43:05 +00:00
+								        ret = breaking_filters(info_dict, incomplete)
 								        if ret is not None:
 								            raise RejectedVideoReached(ret)
-												`--match-filter -` to interactively ask for each video

											
										
										
											2022-04-28 14:33:26 +00:00
+								        if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
 								            return NO_DEFAULT if interactive and not incomplete else None
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								        else:
-												Reject entire playlists faster with `--match-filter`

Rejected based on `playlist_id` etc can be checked before any entries are extracted

Related: #4383

											
										
										
											2022-07-26 03:58:37 +00:00
+								            video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
-												Treat multiple `--match-filters` as OR

Closes #3144

											
										
										
											2022-03-25 08:02:54 +00:00
+								            filter_str = ') | ('.join(map(str.strip, filters))
 								            return f'{video_title} does not pass filter ({filter_str}), skipping ..'
-												[YoutubeDL] Add generic video filtering (Fixes #4916)

This functionality is intended to eventually encompass the current format filtering.

											
										
										
											2015-02-10 02:32:21 +00:00
+								    return _match_func
-												[letv] Add --cn-verification-proxy (Closes #5077)

											
										
										
											2015-03-02 23:03:06 +00:00
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								class download_range_func:
-												Improve `--download-sections`

* Support negative time-ranges
* Add `*from-url` to obey time-ranges in URL

Closes #7248

											
										
										
											2023-06-21 23:24:39 +00:00
+								    def __init__(self, chapters, ranges, from_info=False):
 								        self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
 								    def __call__(self, info_dict, ydl):
-												Don't download entire video when no matching `--download-sections`

											
										
										
											2022-09-25 21:33:52 +00:00
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
+								        warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
-												[cleanup] Misc fixes

Closes #4027

											
										
										
											2022-06-10 19:03:54 +00:00
+								                   else 'Cannot match chapters since chapter information is unavailable')
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								        for regex in self.chapters or []:
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
+								            for i, chapter in enumerate(info_dict.get('chapters') or []):
 								                if re.search(regex, chapter['title']):
 								                    warning = None
 								                    yield {**chapter, 'index': i}
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								        if self.chapters and warning:
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
+								            ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
-												Improve `--download-sections`

* Support negative time-ranges
* Add `*from-url` to obey time-ranges in URL

Closes #7248

											
										
										
											2023-06-21 23:24:39 +00:00
+								        for start, end in self.ranges or []:
 								            yield {
 								                'start_time': self._handle_negative_timestamp(start, info_dict),
 								                'end_time': self._handle_negative_timestamp(end, info_dict),
 								            }
 								        if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
 								            yield {
-												Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56

											
										
										
											2023-06-22 17:52:14 +00:00
+								                'start_time': info_dict.get('start_time') or 0,
 								                'end_time': info_dict.get('end_time') or float('inf'),
-												Improve `--download-sections`

* Support negative time-ranges
* Add `*from-url` to obey time-ranges in URL

Closes #7248

											
										
										
											2023-06-21 23:24:39 +00:00
+								            }
-												Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56

											
										
										
											2023-06-22 17:52:14 +00:00
+								        elif not self.ranges and not self.chapters:
 								            yield {}
-												Improve `--download-sections`

* Support negative time-ranges
* Add `*from-url` to obey time-ranges in URL

Closes #7248

											
										
										
											2023-06-21 23:24:39 +00:00
 								    @staticmethod
 								    def _handle_negative_timestamp(time, info):
 								        return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
-												[cleanup] Misc cleanup

											
										
										
											2022-07-08 19:37:47 +00:00
+								    def __eq__(self, other):
 								        return (isinstance(other, download_range_func)
 								                and self.chapters == other.chapters and self.ranges == other.ranges)
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
-												[cleanup] Misc

											
										
										
											2022-11-30 06:04:51 +00:00
+								    def __repr__(self):
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
+								        return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
-												[cleanup] Misc

											
										
										
											2022-11-30 06:04:51 +00:00
-												Add option `--download-sections` to download video partially

Closes #52, Closes #3932

											
										
										
											2022-06-06 20:13:50 +00:00
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								def parse_dfxp_time_expr(time_expr):
 								    if not time_expr:
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											2015-12-19 10:21:42 +00:00
+								        return
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[cleanup] Misc fixes

Closes #3565, https://github.com/yt-dlp/yt-dlp/issues/3514#issuecomment-1105944364

											
										
										
											2022-04-29 01:48:36 +00:00
+								    mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								    if mobj:
 								        return float(mobj.group('time_offset'))
-												[utils] Support alternative timestamp format in TTML

Fixes #7608

											
										
										
											2015-12-19 11:29:51 +00:00
+								    mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								    if mobj:
-												[utils] Support alternative timestamp format in TTML

Fixes #7608

											
										
										
											2015-12-19 11:29:51 +00:00
+								        return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils,common] Merge format_srt_time and _subtitles_timecode

format_srt_time uses a comma as the delimiter between seconds and
milliseconds while _subtitles_timecode uses a dot. All .srt examples I
found on the Internet uses a comma, so I use a comma in the merged
version. See http://matroska.org/technical/specs/subtitles/srt.html and
http://devel.aegisub.org/wiki/SubtitleFormats/SRT

											
										
										
											2015-05-12 05:04:54 +00:00
+								def srt_subtitles_timecode(seconds):
-												[utils] Standardize timestamp formatting code
Closes #1285

											
										
										
											2021-10-19 17:28:14 +00:00
+								    return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
 								def ass_subtitles_timecode(seconds):
 								    time = timetuple_from_msec(seconds * 1000)
 								    return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
 								def dfxp2srt(dfxp_data):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											2017-09-16 04:18:38 +00:00
+								    @param dfxp_data A bytes-like object containing DFXP data
 								    @returns A unicode object containing converted SRT data
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    LEGACY_NAMESPACES = (
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											2017-09-16 04:18:38 +00:00
+								        (b'http://www.w3.org/ns/ttml', [
 								            b'http://www.w3.org/2004/11/ttaf1',
 								            b'http://www.w3.org/2006/04/ttaf1',
 								            b'http://www.w3.org/2006/10/ttaf1',
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								        ]),
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											2017-09-16 04:18:38 +00:00
+								        (b'http://www.w3.org/ns/ttml#styling', [
 								            b'http://www.w3.org/ns/ttml#style',
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								        ]),
 								    )
 								    SUPPORTED_STYLING = [
 								        'color',
 								        'fontFamily',
 								        'fontSize',
 								        'fontStyle',
 								        'fontWeight',
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        'textDecoration',
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    ]
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											2015-06-21 11:16:59 +00:00
+								    _x = functools.partial(xpath_with_ns, ns_map={
-												[utils] fix style id extraction for namespaced id attribute(closes #16551)

											
										
										
											2018-05-26 13:35:47 +00:00
+								        'xml': 'http://www.w3.org/XML/1998/namespace',
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											2015-06-21 11:16:59 +00:00
+								        'ttml': 'http://www.w3.org/ns/ttml',
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								        'tts': 'http://www.w3.org/ns/ttml#styling',
-												[utils] Support ttaf1 namespace in TTML

It's found in bbc.co.uk. See #6038

											
										
										
											2015-06-21 11:16:59 +00:00
+								    })
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    styles = {}
 								    default_style = {}
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								    class TTMLPElementParser:
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								        _out = ''
 								        _unclosed_elements = []
 								        _applied_styles = []
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
+								        def start(self, tag, attrib):
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								            if tag in (_x('ttml:br'), 'br'):
 								                self._out += '\n'
 								            else:
 								                unclosed_elements = []
 								                style = {}
 								                element_style_id = attrib.get('style')
 								                if default_style:
 								                    style.update(default_style)
 								                if element_style_id:
 								                    style.update(styles.get(element_style_id, {}))
 								                for prop in SUPPORTED_STYLING:
 								                    prop_val = attrib.get(_x('tts:' + prop))
 								                    if prop_val:
 								                        style[prop] = prop_val
 								                if style:
 								                    font = ''
 								                    for k, v in sorted(style.items()):
 								                        if self._applied_styles and self._applied_styles[-1].get(k) == v:
 								                            continue
 								                        if k == 'color':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                            font += f' color="{v}"'
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								                        elif k == 'fontSize':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                            font += f' size="{v}"'
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								                        elif k == 'fontFamily':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                            font += f' face="{v}"'
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								                        elif k == 'fontWeight' and v == 'bold':
 								                            self._out += '<b>'
 								                            unclosed_elements.append('b')
 								                        elif k == 'fontStyle' and v == 'italic':
 								                            self._out += '<i>'
 								                            unclosed_elements.append('i')
 								                        elif k == 'textDecoration' and v == 'underline':
 								                            self._out += '<u>'
 								                            unclosed_elements.append('u')
 								                    if font:
 								                        self._out += '<font' + font + '>'
 								                        unclosed_elements.append('font')
 								                    applied_style = {}
 								                    if self._applied_styles:
 								                        applied_style.update(self._applied_styles[-1])
 								                    applied_style.update(style)
 								                    self._applied_styles.append(applied_style)
 								                self._unclosed_elements.append(unclosed_elements)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
+								        def end(self, tag):
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								            if tag not in (_x('ttml:br'), 'br'):
 								                unclosed_elements = self._unclosed_elements.pop()
 								                for element in reversed(unclosed_elements):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                    self._out += f'</{element}>'
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								                if unclosed_elements and self._applied_styles:
 								                    self._applied_styles.pop()
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
+								        def data(self, data):
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								            self._out += data
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
 								        def close(self):
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								            return self._out.strip()
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
-												[extractor/sbs] Overhaul extractor for new API (#6839)

Closes #6543
Authored by: vidiot720, dirkf, bashonly
											
										
										
											2023-04-18 23:46:57 +00:00
+								    # Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
 								    # This will not trigger false positives since only UTF-8 text is being replaced
 								    dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
-												[utils] fix dfxp2srt text extraction(fixes #8055)

											
										
										
											2016-01-28 11:38:34 +00:00
+								    def parse_node(node):
 								        target = TTMLPElementParser()
 								        parser = xml.etree.ElementTree.XMLParser(target=target)
 								        parser.feed(xml.etree.ElementTree.tostring(node))
 								        return parser.close()
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    for k, v in LEGACY_NAMESPACES:
 								        for ns in v:
 								            dfxp_data = dfxp_data.replace(ns, k)
-												[utils] Use bytes-like objects in dfxp2srt

This fixes handling of non-UTF8 TTML subtitles

Closes #14191

											
										
										
											2017-09-16 04:18:38 +00:00
+								    dfxp = compat_etree_fromstring(dfxp_data)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								    out = []
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
-												[utils] Support TTML without default namespace

In a strict sense such TTML is invalid, but Yahoo uses it.

											
										
										
											2015-05-18 16:45:01 +00:00
 								    if not paras:
 								        raise ValueError('Invalid dfxp/TTML subtitle')
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								    repeat = False
 								    while True:
 								        for style in dfxp.findall(_x('.//ttml:style')):
-												[utils] fix style id extraction for namespaced id attribute(closes #16551)

											
										
										
											2018-05-26 13:35:47 +00:00
+								            style_id = style.get('id') or style.get(_x('xml:id'))
 								            if not style_id:
 								                continue
-												[utils] add support for ttml styles

											
										
										
											2017-02-23 17:46:20 +00:00
+								            parent_style_id = style.get('style')
 								            if parent_style_id:
 								                if parent_style_id not in styles:
 								                    repeat = True
 								                    continue
 								                styles[style_id] = styles[parent_style_id].copy()
 								            for prop in SUPPORTED_STYLING:
 								                prop_val = style.get(_x('tts:' + prop))
 								                if prop_val:
 								                    styles.setdefault(style_id, {})[prop] = prop_val
 								        if repeat:
 								            repeat = False
 								        else:
 								            break
 								    for p in ('body', 'div'):
 								        ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
 								        if ele is None:
 								            continue
 								        style = styles.get(ele.get('style'))
 								        if not style:
 								            continue
 								        default_style.update(style)
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								    for para, index in zip(paras, itertools.count(1)):
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											2015-12-19 10:21:42 +00:00
+								        begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
-												[utils] Support 'dur' field in TTML

											
										
										
											2015-05-12 04:47:37 +00:00
+								        end_time = parse_dfxp_time_expr(para.attrib.get('end'))
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											2015-12-19 10:21:42 +00:00
+								        dur = parse_dfxp_time_expr(para.attrib.get('dur'))
 								        if begin_time is None:
 								            continue
-												[utils] Support 'dur' field in TTML

											
										
										
											2015-05-12 04:47:37 +00:00
+								        if not end_time:
-												[utils] Fix TTML conversion

Tolerate invalid timestamps (closes #7909)

											
										
										
											2015-12-19 10:21:42 +00:00
+								            if not dur:
 								                continue
 								            end_time = begin_time + dur
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								        out.append('%d\n%s --> %s\n%s\n\n' % (
 								            index,
-												[utils,common] Merge format_srt_time and _subtitles_timecode

format_srt_time uses a comma as the delimiter between seconds and
milliseconds while _subtitles_timecode uses a dot. All .srt examples I
found on the Internet uses a comma, so I use a comma in the merged
version. See http://matroska.org/technical/specs/subtitles/srt.html and
http://devel.aegisub.org/wiki/SubtitleFormats/SRT

											
										
										
											2015-05-12 05:04:54 +00:00
+								            srt_subtitles_timecode(begin_time),
 								            srt_subtitles_timecode(end_time),
-												[ffmpeg] Add dfxp (TTML) subtitles support (#3432, #5146)

											
										
										
											2015-04-25 15:15:05 +00:00
+								            parse_node(para)))
 								    return ''.join(out)
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								def cli_option(params, command_option, param, separator=None):
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
+								    param = params.get(param)
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    return ([] if param is None
 								            else [command_option, str(param)] if separator is None
 								            else [f'{command_option}{separator}{param}'])
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
 								def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
 								    param = params.get(param)
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    assert param in (True, False, None)
 								    return cli_option({True: true_value, False: false_value}, command_option, param, separator)
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
 								def cli_valueless_option(params, command_option, param, expected_value=True):
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								    return [command_option] if params.get(param) == expected_value else []
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
-												Refactor (See desc)

* Create `FFmpegPostProcessor.real_run_ffmpeg` that can accept multiple input/output files along with switches for each
* Rewrite `cli_configuration_args` and related functions
* Create `YoutubeDL._ensure_dir_exists` - this was previously defined in multiple places

											
										
										
											2021-03-09 02:17:21 +00:00
+								def cli_configuration_args(argdict, keys, default=[], use_compat=True):
-												Modified function `cli_configuration_args`
to directly parse new format of `postprocessor_args` and `external_downloader_args`

											
										
										
											2021-01-23 09:43:51 +00:00
+								    if isinstance(argdict, (list, tuple)):  # for backward compatibility
-												Refactor (See desc)

* Create `FFmpegPostProcessor.real_run_ffmpeg` that can accept multiple input/output files along with switches for each
* Rewrite `cli_configuration_args` and related functions
* Create `YoutubeDL._ensure_dir_exists` - this was previously defined in multiple places

											
										
										
											2021-03-09 02:17:21 +00:00
+								        if use_compat:
-												[ffmpeg] Allow passing custom arguments before -i

:ci skip dl

											
										
										
											2021-02-24 16:05:18 +00:00
+								            return argdict
 								        else:
 								            argdict = None
-												Modified function `cli_configuration_args`
to directly parse new format of `postprocessor_args` and `external_downloader_args`

											
										
										
											2021-01-23 09:43:51 +00:00
+								    if argdict is None:
-												[ffmpeg] Allow passing custom arguments before -i

:ci skip dl

											
										
										
											2021-02-24 16:05:18 +00:00
+								        return default
-												Modified function `cli_configuration_args`
to directly parse new format of `postprocessor_args` and `external_downloader_args`

											
										
										
											2021-01-23 09:43:51 +00:00
+								    assert isinstance(argdict, dict)
-												Refactor (See desc)

* Create `FFmpegPostProcessor.real_run_ffmpeg` that can accept multiple input/output files along with switches for each
* Rewrite `cli_configuration_args` and related functions
* Create `YoutubeDL._ensure_dir_exists` - this was previously defined in multiple places

											
										
										
											2021-03-09 02:17:21 +00:00
+								    assert isinstance(keys, (list, tuple))
 								    for key_list in keys:
 								        arg_list = list(filter(
 								            lambda x: x is not None,
-												[utils] Add `variadic`

											
										
										
											2021-07-10 21:59:44 +00:00
+								            [argdict.get(key.lower()) for key in variadic(key_list)]))
-												Refactor (See desc)

* Create `FFmpegPostProcessor.real_run_ffmpeg` that can accept multiple input/output files along with switches for each
* Rewrite `cli_configuration_args` and related functions
* Create `YoutubeDL._ensure_dir_exists` - this was previously defined in multiple places

											
										
										
											2021-03-09 02:17:21 +00:00
+								        if arg_list:
 								            return [arg for args in arg_list for arg in args]
 								    return default
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
-												[downloader/ffmpeg] Support for DASH manifests (experimental)
Closes #159

											
										
										
											2021-08-24 00:12:45 +00:00
-												[downloader/ffmpeg] Allow passing custom arguments before -i
Closes #686

											
										
										
											2021-08-23 21:45:44 +00:00
+								def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
 								    main_key, exe = main_key.lower(), exe.lower()
 								    root_key = exe if main_key == exe else f'{main_key}+{exe}'
 								    keys = [f'{root_key}{k}' for k in (keys or [''])]
 								    if root_key in keys:
 								        if main_key != exe:
 								            keys.append((main_key, exe))
 								        keys.append('default')
 								    else:
 								        use_compat = False
 								    return cli_configuration_args(argdict, keys, default, use_compat)
-												[utils] Generalize cli option converters

											
										
										
											2015-09-04 21:05:11 +00:00
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								class ISO639Utils:
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											2015-06-21 10:53:17 +00:00
+								    # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 								    _lang_map = {
 								        'aa': 'aar',
 								        'ab': 'abk',
 								        'ae': 'ave',
 								        'af': 'afr',
 								        'ak': 'aka',
 								        'am': 'amh',
 								        'an': 'arg',
 								        'ar': 'ara',
 								        'as': 'asm',
 								        'av': 'ava',
 								        'ay': 'aym',
 								        'az': 'aze',
 								        'ba': 'bak',
 								        'be': 'bel',
 								        'bg': 'bul',
 								        'bh': 'bih',
 								        'bi': 'bis',
 								        'bm': 'bam',
 								        'bn': 'ben',
 								        'bo': 'bod',
 								        'br': 'bre',
 								        'bs': 'bos',
 								        'ca': 'cat',
 								        'ce': 'che',
 								        'ch': 'cha',
 								        'co': 'cos',
 								        'cr': 'cre',
 								        'cs': 'ces',
 								        'cu': 'chu',
 								        'cv': 'chv',
 								        'cy': 'cym',
 								        'da': 'dan',
 								        'de': 'deu',
 								        'dv': 'div',
 								        'dz': 'dzo',
 								        'ee': 'ewe',
 								        'el': 'ell',
 								        'en': 'eng',
 								        'eo': 'epo',
 								        'es': 'spa',
 								        'et': 'est',
 								        'eu': 'eus',
 								        'fa': 'fas',
 								        'ff': 'ful',
 								        'fi': 'fin',
 								        'fj': 'fij',
 								        'fo': 'fao',
 								        'fr': 'fra',
 								        'fy': 'fry',
 								        'ga': 'gle',
 								        'gd': 'gla',
 								        'gl': 'glg',
 								        'gn': 'grn',
 								        'gu': 'guj',
 								        'gv': 'glv',
 								        'ha': 'hau',
 								        'he': 'heb',
-												[utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765)

											
										
										
											2019-01-06 17:55:39 +00:00
+								        'iw': 'heb',  # Replaced by he in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											2015-06-21 10:53:17 +00:00
+								        'hi': 'hin',
 								        'ho': 'hmo',
 								        'hr': 'hrv',
 								        'ht': 'hat',
 								        'hu': 'hun',
 								        'hy': 'hye',
 								        'hz': 'her',
 								        'ia': 'ina',
 								        'id': 'ind',
-												[utils] Add language codes replaced in 1989 revision of ISO 639 to ISO639Utils (closes #18765)

											
										
										
											2019-01-06 17:55:39 +00:00
+								        'in': 'ind',  # Replaced by id in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											2015-06-21 10:53:17 +00:00
+								        'ie': 'ile',
 								        'ig': 'ibo',
 								        'ii': 'iii',
 								        'ik': 'ipk',
 								        'io': 'ido',
 								        'is': 'isl',
 								        'it': 'ita',
 								        'iu': 'iku',
 								        'ja': 'jpn',
 								        'jv': 'jav',
 								        'ka': 'kat',
 								        'kg': 'kon',
 								        'ki': 'kik',
 								        'kj': 'kua',
 								        'kk': 'kaz',
 								        'kl': 'kal',
 								        'km': 'khm',
 								        'kn': 'kan',
 								        'ko': 'kor',
 								        'kr': 'kau',
 								        'ks': 'kas',
 								        'ku': 'kur',
 								        'kv': 'kom',
 								        'kw': 'cor',
 								        'ky': 'kir',
 								        'la': 'lat',
 								        'lb': 'ltz',
 								        'lg': 'lug',
 								        'li': 'lim',
 								        'ln': 'lin',
 								        'lo': 'lao',
 								        'lt': 'lit',
 								        'lu': 'lub',
 								        'lv': 'lav',
 								        'mg': 'mlg',
 								        'mh': 'mah',
 								        'mi': 'mri',
 								        'mk': 'mkd',
 								        'ml': 'mal',
 								        'mn': 'mon',
 								        'mr': 'mar',
 								        'ms': 'msa',
 								        'mt': 'mlt',
 								        'my': 'mya',
 								        'na': 'nau',
 								        'nb': 'nob',
 								        'nd': 'nde',
 								        'ne': 'nep',
 								        'ng': 'ndo',
 								        'nl': 'nld',
 								        'nn': 'nno',
 								        'no': 'nor',
 								        'nr': 'nbl',
 								        'nv': 'nav',
 								        'ny': 'nya',
 								        'oc': 'oci',
 								        'oj': 'oji',
 								        'om': 'orm',
 								        'or': 'ori',
 								        'os': 'oss',
 								        'pa': 'pan',
-												[extractor/urplay] Extract all subtitles (#7309)

Authored by: hoaluvn
											
										
										
											2023-06-14 15:52:17 +00:00
+								        'pe': 'per',
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											2015-06-21 10:53:17 +00:00
+								        'pi': 'pli',
 								        'pl': 'pol',
 								        'ps': 'pus',
 								        'pt': 'por',
 								        'qu': 'que',
 								        'rm': 'roh',
 								        'rn': 'run',
 								        'ro': 'ron',
 								        'ru': 'rus',
 								        'rw': 'kin',
 								        'sa': 'san',
 								        'sc': 'srd',
 								        'sd': 'snd',
 								        'se': 'sme',
 								        'sg': 'sag',
 								        'si': 'sin',
 								        'sk': 'slk',
 								        'sl': 'slv',
 								        'sm': 'smo',
 								        'sn': 'sna',
 								        'so': 'som',
 								        'sq': 'sqi',
 								        'sr': 'srp',
 								        'ss': 'ssw',
 								        'st': 'sot',
 								        'su': 'sun',
 								        'sv': 'swe',
 								        'sw': 'swa',
 								        'ta': 'tam',
 								        'te': 'tel',
 								        'tg': 'tgk',
 								        'th': 'tha',
 								        'ti': 'tir',
 								        'tk': 'tuk',
 								        'tl': 'tgl',
 								        'tn': 'tsn',
 								        'to': 'ton',
 								        'tr': 'tur',
 								        'ts': 'tso',
 								        'tt': 'tat',
 								        'tw': 'twi',
 								        'ty': 'tah',
 								        'ug': 'uig',
 								        'uk': 'ukr',
 								        'ur': 'urd',
 								        'uz': 'uzb',
 								        've': 'ven',
 								        'vi': 'vie',
 								        'vo': 'vol',
 								        'wa': 'wln',
 								        'wo': 'wol',
 								        'xh': 'xho',
 								        'yi': 'yid',
-												[utils] Fix typo

											
										
										
											2019-01-06 18:02:34 +00:00
+								        'ji': 'yid',  # Replaced by yi in 1989 revision
-												[utils/ffmpeg] Move ISO 639 related codes to utils

											
										
										
											2015-06-21 10:53:17 +00:00
+								        'yo': 'yor',
 								        'za': 'zha',
 								        'zh': 'zho',
 								        'zu': 'zul',
 								    }
 								    @classmethod
 								    def short2long(cls, code):
 								        """Convert language code from ISO 639-1 to ISO 639-2/T"""
 								        return cls._lang_map.get(code[:2])
 								    @classmethod
 								    def long2short(cls, code):
 								        """Convert language code from ISO 639-2/T to ISO 639-1"""
 								        for short_name, long_name in cls._lang_map.items():
 								            if long_name == code:
 								                return short_name
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								class ISO3166Utils:
-												[utils] Add ISO3166Utils

											
										
										
											2015-06-27 05:13:57 +00:00
+								    # From http://data.okfn.org/data/core/country-list
 								    _country_map = {
 								        'AF': 'Afghanistan',
 								        'AX': 'Åland Islands',
 								        'AL': 'Albania',
 								        'DZ': 'Algeria',
 								        'AS': 'American Samoa',
 								        'AD': 'Andorra',
 								        'AO': 'Angola',
 								        'AI': 'Anguilla',
 								        'AQ': 'Antarctica',
 								        'AG': 'Antigua and Barbuda',
 								        'AR': 'Argentina',
 								        'AM': 'Armenia',
 								        'AW': 'Aruba',
 								        'AU': 'Australia',
 								        'AT': 'Austria',
 								        'AZ': 'Azerbaijan',
 								        'BS': 'Bahamas',
 								        'BH': 'Bahrain',
 								        'BD': 'Bangladesh',
 								        'BB': 'Barbados',
 								        'BY': 'Belarus',
 								        'BE': 'Belgium',
 								        'BZ': 'Belize',
 								        'BJ': 'Benin',
 								        'BM': 'Bermuda',
 								        'BT': 'Bhutan',
 								        'BO': 'Bolivia, Plurinational State of',
 								        'BQ': 'Bonaire, Sint Eustatius and Saba',
 								        'BA': 'Bosnia and Herzegovina',
 								        'BW': 'Botswana',
 								        'BV': 'Bouvet Island',
 								        'BR': 'Brazil',
 								        'IO': 'British Indian Ocean Territory',
 								        'BN': 'Brunei Darussalam',
 								        'BG': 'Bulgaria',
 								        'BF': 'Burkina Faso',
 								        'BI': 'Burundi',
 								        'KH': 'Cambodia',
 								        'CM': 'Cameroon',
 								        'CA': 'Canada',
 								        'CV': 'Cape Verde',
 								        'KY': 'Cayman Islands',
 								        'CF': 'Central African Republic',
 								        'TD': 'Chad',
 								        'CL': 'Chile',
 								        'CN': 'China',
 								        'CX': 'Christmas Island',
 								        'CC': 'Cocos (Keeling) Islands',
 								        'CO': 'Colombia',
 								        'KM': 'Comoros',
 								        'CG': 'Congo',
 								        'CD': 'Congo, the Democratic Republic of the',
 								        'CK': 'Cook Islands',
 								        'CR': 'Costa Rica',
 								        'CI': 'Côte d\'Ivoire',
 								        'HR': 'Croatia',
 								        'CU': 'Cuba',
 								        'CW': 'Curaçao',
 								        'CY': 'Cyprus',
 								        'CZ': 'Czech Republic',
 								        'DK': 'Denmark',
 								        'DJ': 'Djibouti',
 								        'DM': 'Dominica',
 								        'DO': 'Dominican Republic',
 								        'EC': 'Ecuador',
 								        'EG': 'Egypt',
 								        'SV': 'El Salvador',
 								        'GQ': 'Equatorial Guinea',
 								        'ER': 'Eritrea',
 								        'EE': 'Estonia',
 								        'ET': 'Ethiopia',
 								        'FK': 'Falkland Islands (Malvinas)',
 								        'FO': 'Faroe Islands',
 								        'FJ': 'Fiji',
 								        'FI': 'Finland',
 								        'FR': 'France',
 								        'GF': 'French Guiana',
 								        'PF': 'French Polynesia',
 								        'TF': 'French Southern Territories',
 								        'GA': 'Gabon',
 								        'GM': 'Gambia',
 								        'GE': 'Georgia',
 								        'DE': 'Germany',
 								        'GH': 'Ghana',
 								        'GI': 'Gibraltar',
 								        'GR': 'Greece',
 								        'GL': 'Greenland',
 								        'GD': 'Grenada',
 								        'GP': 'Guadeloupe',
 								        'GU': 'Guam',
 								        'GT': 'Guatemala',
 								        'GG': 'Guernsey',
 								        'GN': 'Guinea',
 								        'GW': 'Guinea-Bissau',
 								        'GY': 'Guyana',
 								        'HT': 'Haiti',
 								        'HM': 'Heard Island and McDonald Islands',
 								        'VA': 'Holy See (Vatican City State)',
 								        'HN': 'Honduras',
 								        'HK': 'Hong Kong',
 								        'HU': 'Hungary',
 								        'IS': 'Iceland',
 								        'IN': 'India',
 								        'ID': 'Indonesia',
 								        'IR': 'Iran, Islamic Republic of',
 								        'IQ': 'Iraq',
 								        'IE': 'Ireland',
 								        'IM': 'Isle of Man',
 								        'IL': 'Israel',
 								        'IT': 'Italy',
 								        'JM': 'Jamaica',
 								        'JP': 'Japan',
 								        'JE': 'Jersey',
 								        'JO': 'Jordan',
 								        'KZ': 'Kazakhstan',
 								        'KE': 'Kenya',
 								        'KI': 'Kiribati',
 								        'KP': 'Korea, Democratic People\'s Republic of',
 								        'KR': 'Korea, Republic of',
 								        'KW': 'Kuwait',
 								        'KG': 'Kyrgyzstan',
 								        'LA': 'Lao People\'s Democratic Republic',
 								        'LV': 'Latvia',
 								        'LB': 'Lebanon',
 								        'LS': 'Lesotho',
 								        'LR': 'Liberia',
 								        'LY': 'Libya',
 								        'LI': 'Liechtenstein',
 								        'LT': 'Lithuania',
 								        'LU': 'Luxembourg',
 								        'MO': 'Macao',
 								        'MK': 'Macedonia, the Former Yugoslav Republic of',
 								        'MG': 'Madagascar',
 								        'MW': 'Malawi',
 								        'MY': 'Malaysia',
 								        'MV': 'Maldives',
 								        'ML': 'Mali',
 								        'MT': 'Malta',
 								        'MH': 'Marshall Islands',
 								        'MQ': 'Martinique',
 								        'MR': 'Mauritania',
 								        'MU': 'Mauritius',
 								        'YT': 'Mayotte',
 								        'MX': 'Mexico',
 								        'FM': 'Micronesia, Federated States of',
 								        'MD': 'Moldova, Republic of',
 								        'MC': 'Monaco',
 								        'MN': 'Mongolia',
 								        'ME': 'Montenegro',
 								        'MS': 'Montserrat',
 								        'MA': 'Morocco',
 								        'MZ': 'Mozambique',
 								        'MM': 'Myanmar',
 								        'NA': 'Namibia',
 								        'NR': 'Nauru',
 								        'NP': 'Nepal',
 								        'NL': 'Netherlands',
 								        'NC': 'New Caledonia',
 								        'NZ': 'New Zealand',
 								        'NI': 'Nicaragua',
 								        'NE': 'Niger',
 								        'NG': 'Nigeria',
 								        'NU': 'Niue',
 								        'NF': 'Norfolk Island',
 								        'MP': 'Northern Mariana Islands',
 								        'NO': 'Norway',
 								        'OM': 'Oman',
 								        'PK': 'Pakistan',
 								        'PW': 'Palau',
 								        'PS': 'Palestine, State of',
 								        'PA': 'Panama',
 								        'PG': 'Papua New Guinea',
 								        'PY': 'Paraguay',
 								        'PE': 'Peru',
 								        'PH': 'Philippines',
 								        'PN': 'Pitcairn',
 								        'PL': 'Poland',
 								        'PT': 'Portugal',
 								        'PR': 'Puerto Rico',
 								        'QA': 'Qatar',
 								        'RE': 'Réunion',
 								        'RO': 'Romania',
 								        'RU': 'Russian Federation',
 								        'RW': 'Rwanda',
 								        'BL': 'Saint Barthélemy',
 								        'SH': 'Saint Helena, Ascension and Tristan da Cunha',
 								        'KN': 'Saint Kitts and Nevis',
 								        'LC': 'Saint Lucia',
 								        'MF': 'Saint Martin (French part)',
 								        'PM': 'Saint Pierre and Miquelon',
 								        'VC': 'Saint Vincent and the Grenadines',
 								        'WS': 'Samoa',
 								        'SM': 'San Marino',
 								        'ST': 'Sao Tome and Principe',
 								        'SA': 'Saudi Arabia',
 								        'SN': 'Senegal',
 								        'RS': 'Serbia',
 								        'SC': 'Seychelles',
 								        'SL': 'Sierra Leone',
 								        'SG': 'Singapore',
 								        'SX': 'Sint Maarten (Dutch part)',
 								        'SK': 'Slovakia',
 								        'SI': 'Slovenia',
 								        'SB': 'Solomon Islands',
 								        'SO': 'Somalia',
 								        'ZA': 'South Africa',
 								        'GS': 'South Georgia and the South Sandwich Islands',
 								        'SS': 'South Sudan',
 								        'ES': 'Spain',
 								        'LK': 'Sri Lanka',
 								        'SD': 'Sudan',
 								        'SR': 'Suriname',
 								        'SJ': 'Svalbard and Jan Mayen',
 								        'SZ': 'Swaziland',
 								        'SE': 'Sweden',
 								        'CH': 'Switzerland',
 								        'SY': 'Syrian Arab Republic',
 								        'TW': 'Taiwan, Province of China',
 								        'TJ': 'Tajikistan',
 								        'TZ': 'Tanzania, United Republic of',
 								        'TH': 'Thailand',
 								        'TL': 'Timor-Leste',
 								        'TG': 'Togo',
 								        'TK': 'Tokelau',
 								        'TO': 'Tonga',
 								        'TT': 'Trinidad and Tobago',
 								        'TN': 'Tunisia',
 								        'TR': 'Turkey',
 								        'TM': 'Turkmenistan',
 								        'TC': 'Turks and Caicos Islands',
 								        'TV': 'Tuvalu',
 								        'UG': 'Uganda',
 								        'UA': 'Ukraine',
 								        'AE': 'United Arab Emirates',
 								        'GB': 'United Kingdom',
 								        'US': 'United States',
 								        'UM': 'United States Minor Outlying Islands',
 								        'UY': 'Uruguay',
 								        'UZ': 'Uzbekistan',
 								        'VU': 'Vanuatu',
 								        'VE': 'Venezuela, Bolivarian Republic of',
 								        'VN': 'Viet Nam',
 								        'VG': 'Virgin Islands, British',
 								        'VI': 'Virgin Islands, U.S.',
 								        'WF': 'Wallis and Futuna',
 								        'EH': 'Western Sahara',
 								        'YE': 'Yemen',
 								        'ZM': 'Zambia',
 								        'ZW': 'Zimbabwe',
-												[utils] `ISO3166Utils`: Add `EU` and `AP`

Fixes https://github.com/yt-dlp/yt-dlp/pull/3302#discussion_r875528517

											
										
										
											2022-05-18 08:36:41 +00:00
+								        # Not ISO 3166 codes, but used for IP blocks
 								        'AP': 'Asia/Pacific Region',
 								        'EU': 'Europe',
-												[utils] Add ISO3166Utils

											
										
										
											2015-06-27 05:13:57 +00:00
+								    }
 								    @classmethod
 								    def short2full(cls, code):
 								        """Convert an ISO 3166-2 country code to the corresponding full name"""
 								        return cls._country_map.get(code.upper())
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								class GeoUtils:
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								    # Major IPv4 address blocks per country
 								    _country_ip_map = {
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'AD': '46.172.224.0/19',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'AE': '94.200.0.0/13',
 								        'AF': '149.54.0.0/17',
 								        'AG': '209.59.64.0/18',
 								        'AI': '204.14.248.0/21',
 								        'AL': '46.99.0.0/16',
 								        'AM': '46.70.0.0/15',
 								        'AO': '105.168.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'AP': '182.50.184.0/21',
 								        'AQ': '23.154.160.0/24',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'AR': '181.0.0.0/12',
 								        'AS': '202.70.112.0/20',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'AT': '77.116.0.0/14',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'AU': '1.128.0.0/11',
 								        'AW': '181.41.0.0/18',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'AX': '185.217.4.0/22',
 								        'AZ': '5.197.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'BA': '31.176.128.0/17',
 								        'BB': '65.48.128.0/17',
 								        'BD': '114.130.0.0/16',
 								        'BE': '57.0.0.0/8',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'BF': '102.178.0.0/15',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'BG': '95.42.0.0/15',
 								        'BH': '37.131.0.0/17',
 								        'BI': '154.117.192.0/18',
 								        'BJ': '137.255.0.0/16',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'BL': '185.212.72.0/23',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'BM': '196.12.64.0/18',
 								        'BN': '156.31.0.0/16',
 								        'BO': '161.56.0.0/16',
 								        'BQ': '161.0.80.0/20',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'BR': '191.128.0.0/12',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'BS': '24.51.64.0/18',
 								        'BT': '119.2.96.0/19',
 								        'BW': '168.167.0.0/16',
 								        'BY': '178.120.0.0/13',
 								        'BZ': '179.42.192.0/18',
 								        'CA': '99.224.0.0/11',
 								        'CD': '41.243.0.0/16',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'CF': '197.242.176.0/21',
 								        'CG': '160.113.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'CH': '85.0.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'CI': '102.136.0.0/14',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'CK': '202.65.32.0/19',
 								        'CL': '152.172.0.0/14',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'CM': '102.244.0.0/14',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'CN': '36.128.0.0/10',
 								        'CO': '181.240.0.0/12',
 								        'CR': '201.192.0.0/12',
 								        'CU': '152.206.0.0/15',
 								        'CV': '165.90.96.0/19',
 								        'CW': '190.88.128.0/17',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'CY': '31.153.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'CZ': '88.100.0.0/14',
 								        'DE': '53.0.0.0/8',
 								        'DJ': '197.241.0.0/17',
 								        'DK': '87.48.0.0/12',
 								        'DM': '192.243.48.0/20',
 								        'DO': '152.166.0.0/15',
 								        'DZ': '41.96.0.0/12',
 								        'EC': '186.68.0.0/15',
 								        'EE': '90.190.0.0/15',
 								        'EG': '156.160.0.0/11',
 								        'ER': '196.200.96.0/20',
 								        'ES': '88.0.0.0/11',
 								        'ET': '196.188.0.0/14',
 								        'EU': '2.16.0.0/13',
 								        'FI': '91.152.0.0/13',
 								        'FJ': '144.120.0.0/16',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'FK': '80.73.208.0/21',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'FM': '119.252.112.0/20',
 								        'FO': '88.85.32.0/19',
 								        'FR': '90.0.0.0/9',
 								        'GA': '41.158.0.0/15',
 								        'GB': '25.0.0.0/8',
 								        'GD': '74.122.88.0/21',
 								        'GE': '31.146.0.0/16',
 								        'GF': '161.22.64.0/18',
 								        'GG': '62.68.160.0/19',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'GH': '154.160.0.0/12',
 								        'GI': '95.164.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'GL': '88.83.0.0/19',
 								        'GM': '160.182.0.0/15',
 								        'GN': '197.149.192.0/18',
 								        'GP': '104.250.0.0/19',
 								        'GQ': '105.235.224.0/20',
 								        'GR': '94.64.0.0/13',
 								        'GT': '168.234.0.0/16',
 								        'GU': '168.123.0.0/16',
 								        'GW': '197.214.80.0/20',
 								        'GY': '181.41.64.0/18',
 								        'HK': '113.252.0.0/14',
 								        'HN': '181.210.0.0/16',
 								        'HR': '93.136.0.0/13',
 								        'HT': '148.102.128.0/17',
 								        'HU': '84.0.0.0/14',
 								        'ID': '39.192.0.0/10',
 								        'IE': '87.32.0.0/12',
 								        'IL': '79.176.0.0/13',
 								        'IM': '5.62.80.0/20',
 								        'IN': '117.192.0.0/10',
 								        'IO': '203.83.48.0/21',
 								        'IQ': '37.236.0.0/14',
 								        'IR': '2.176.0.0/12',
 								        'IS': '82.221.0.0/16',
 								        'IT': '79.0.0.0/10',
 								        'JE': '87.244.64.0/18',
 								        'JM': '72.27.0.0/17',
 								        'JO': '176.29.0.0/16',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'JP': '133.0.0.0/8',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'KE': '105.48.0.0/12',
 								        'KG': '158.181.128.0/17',
 								        'KH': '36.37.128.0/17',
 								        'KI': '103.25.140.0/22',
 								        'KM': '197.255.224.0/20',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'KN': '198.167.192.0/19',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'KP': '175.45.176.0/22',
 								        'KR': '175.192.0.0/10',
 								        'KW': '37.36.0.0/14',
 								        'KY': '64.96.0.0/15',
 								        'KZ': '2.72.0.0/13',
 								        'LA': '115.84.64.0/18',
 								        'LB': '178.135.0.0/16',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'LC': '24.92.144.0/20',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'LI': '82.117.0.0/19',
 								        'LK': '112.134.0.0/15',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'LR': '102.183.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'LS': '129.232.0.0/17',
 								        'LT': '78.56.0.0/13',
 								        'LU': '188.42.0.0/16',
 								        'LV': '46.109.0.0/16',
 								        'LY': '41.252.0.0/14',
 								        'MA': '105.128.0.0/11',
 								        'MC': '88.209.64.0/18',
 								        'MD': '37.246.0.0/16',
 								        'ME': '178.175.0.0/17',
 								        'MF': '74.112.232.0/21',
 								        'MG': '154.126.0.0/17',
 								        'MH': '117.103.88.0/21',
 								        'MK': '77.28.0.0/15',
 								        'ML': '154.118.128.0/18',
 								        'MM': '37.111.0.0/17',
 								        'MN': '49.0.128.0/17',
 								        'MO': '60.246.0.0/16',
 								        'MP': '202.88.64.0/20',
 								        'MQ': '109.203.224.0/19',
 								        'MR': '41.188.64.0/18',
 								        'MS': '208.90.112.0/22',
 								        'MT': '46.11.0.0/16',
 								        'MU': '105.16.0.0/12',
 								        'MV': '27.114.128.0/18',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'MW': '102.70.0.0/15',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'MX': '187.192.0.0/11',
 								        'MY': '175.136.0.0/13',
 								        'MZ': '197.218.0.0/15',
 								        'NA': '41.182.0.0/16',
 								        'NC': '101.101.0.0/18',
 								        'NE': '197.214.0.0/18',
 								        'NF': '203.17.240.0/22',
 								        'NG': '105.112.0.0/12',
 								        'NI': '186.76.0.0/15',
 								        'NL': '145.96.0.0/11',
 								        'NO': '84.208.0.0/13',
 								        'NP': '36.252.0.0/15',
 								        'NR': '203.98.224.0/19',
 								        'NU': '49.156.48.0/22',
 								        'NZ': '49.224.0.0/14',
 								        'OM': '5.36.0.0/15',
 								        'PA': '186.72.0.0/15',
 								        'PE': '186.160.0.0/14',
 								        'PF': '123.50.64.0/18',
 								        'PG': '124.240.192.0/19',
 								        'PH': '49.144.0.0/13',
 								        'PK': '39.32.0.0/11',
 								        'PL': '83.0.0.0/11',
 								        'PM': '70.36.0.0/20',
 								        'PR': '66.50.0.0/16',
 								        'PS': '188.161.0.0/16',
 								        'PT': '85.240.0.0/13',
 								        'PW': '202.124.224.0/20',
 								        'PY': '181.120.0.0/14',
 								        'QA': '37.210.0.0/15',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'RE': '102.35.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'RO': '79.112.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'RS': '93.86.0.0/15',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'RU': '5.136.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'RW': '41.186.0.0/16',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'SA': '188.48.0.0/13',
 								        'SB': '202.1.160.0/19',
 								        'SC': '154.192.0.0/11',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'SD': '102.120.0.0/13',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'SE': '78.64.0.0/12',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'SG': '8.128.0.0/10',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'SI': '188.196.0.0/14',
 								        'SK': '78.98.0.0/15',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'SL': '102.143.0.0/17',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'SM': '89.186.32.0/19',
 								        'SN': '41.82.0.0/15',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'SO': '154.115.192.0/18',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'SR': '186.179.128.0/17',
 								        'SS': '105.235.208.0/21',
 								        'ST': '197.159.160.0/19',
 								        'SV': '168.243.0.0/16',
 								        'SX': '190.102.0.0/20',
 								        'SY': '5.0.0.0/16',
 								        'SZ': '41.84.224.0/19',
 								        'TC': '65.255.48.0/20',
 								        'TD': '154.68.128.0/19',
 								        'TG': '196.168.0.0/14',
 								        'TH': '171.96.0.0/13',
 								        'TJ': '85.9.128.0/18',
 								        'TK': '27.96.24.0/21',
 								        'TL': '180.189.160.0/20',
 								        'TM': '95.85.96.0/19',
 								        'TN': '197.0.0.0/11',
 								        'TO': '175.176.144.0/21',
 								        'TR': '78.160.0.0/11',
 								        'TT': '186.44.0.0/15',
 								        'TV': '202.2.96.0/19',
 								        'TW': '120.96.0.0/11',
 								        'TZ': '156.156.0.0/14',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'UA': '37.52.0.0/14',
 								        'UG': '102.80.0.0/13',
 								        'US': '6.0.0.0/8',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'UY': '167.56.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'UZ': '84.54.64.0/18',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'VA': '212.77.0.0/19',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'VC': '207.191.240.0/21',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'VE': '186.88.0.0/13',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'VG': '66.81.192.0/20',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        'VI': '146.226.0.0/16',
 								        'VN': '14.160.0.0/11',
 								        'VU': '202.80.32.0/20',
 								        'WF': '117.20.32.0/21',
 								        'WS': '202.4.32.0/19',
 								        'YE': '134.35.0.0/16',
 								        'YT': '41.242.116.0/22',
 								        'ZA': '41.0.0.0/11',
-												[utils] Actualize major IPv4 address blocks per country

											
										
										
											2019-10-28 23:10:20 +00:00
+								        'ZM': '102.144.0.0/13',
 								        'ZW': '102.177.192.0/18',
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								    }
 								    @classmethod
-												Improve geo bypass mechanism
* Introduce geo bypass context
* Add ability to bypass based on IP blocks in CIDR notation
* Introduce --geo-bypass-ip-block

											
										
										
											2018-05-02 00:18:01 +00:00
+								    def random_ipv4(cls, code_or_block):
 								        if len(code_or_block) == 2:
 								            block = cls._country_ip_map.get(code_or_block.upper())
 								            if not block:
 								                return None
 								        else:
 								            block = code_or_block
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        addr, preflen = block.split('/')
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
+								        addr_max = addr_min | (0xffffffff >> int(preflen))
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								        return str(socket.inet_ntoa(
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								            struct.pack('!L', random.randint(addr_min, addr_max))))
-												Add experimental geo restriction bypass mechanism
Based on faking X-Forwarded-For HTTP header

											
										
										
											2017-02-04 11:49:58 +00:00
-												[utils] Add bytes_to_long() and long_to_bytes()

Used in daisuki.net (#4738)

Both are adapted from public domain PyCrypto:
https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py

											
										
										
											2017-02-28 11:16:55 +00:00
+								# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
 								# released into Public Domain
 								# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
 								def long_to_bytes(n, blocksize=0):
 								    """long_to_bytes(n:long, blocksize:int) : string
 								    Convert a long integer to a byte string.
 								    If optional blocksize is given and greater than zero, pad the front of the
 								    byte string with binary zeros so that the length is a multiple of
 								    blocksize.
 								    """
 								    # after much testing, this algorithm was deemed to be the fastest
 								    s = b''
 								    n = int(n)
 								    while n > 0:
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        s = struct.pack('>I', n & 0xffffffff) + s
-												[utils] Add bytes_to_long() and long_to_bytes()

Used in daisuki.net (#4738)

Both are adapted from public domain PyCrypto:
https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py

											
										
										
											2017-02-28 11:16:55 +00:00
+								        n = n >> 32
 								    # strip off leading zeros
 								    for i in range(len(s)):
 								        if s[i] != b'\000'[0]:
 								            break
 								    else:
 								        # only happens when n == 0
 								        s = b'\000'
 								        i = 0
 								    s = s[i:]
 								    # add back some pad bytes.  this could be done more efficiently w.r.t. the
 								    # de-padding being done above, but sigh...
 								    if blocksize > 0 and len(s) % blocksize:
 								        s = (blocksize - len(s) % blocksize) * b'\000' + s
 								    return s
 								def bytes_to_long(s):
 								    """bytes_to_long(string) : long
 								    Convert a byte string to a long integer.
 								    This is (essentially) the inverse of long_to_bytes().
 								    """
 								    acc = 0
 								    length = len(s)
 								    if length % 4:
 								        extra = (4 - length % 4)
 								        s = b'\000' * extra + s
 								        length = length + extra
 								    for i in range(0, length, 4):
-												[compat] Remove more functions

Removing any more will require changes to a large number of extractors

											
										
										
											2022-06-24 08:10:17 +00:00
+								        acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
-												[utils] Add bytes_to_long() and long_to_bytes()

Used in daisuki.net (#4738)

Both are adapted from public domain PyCrypto:
https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py

											
										
										
											2017-02-28 11:16:55 +00:00
+								    return acc
-												[utils] Add OHDave's RSA encryption function

											
										
										
											2016-02-16 22:01:44 +00:00
+								def ohdave_rsa_encrypt(data, exponent, modulus):
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[utils] Add OHDave's RSA encryption function

											
										
										
											2016-02-16 22:01:44 +00:00
+								    Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
 								    Input:
 								        data: data to encrypt, bytes-like object
 								        exponent, modulus: parameter e and N of RSA algorithm, both integer
 								    Output: hex string of encrypted data
 								    Limitation: supports one block encryption only
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    """
-												[utils] Add OHDave's RSA encryption function

											
										
										
											2016-02-16 22:01:44 +00:00
 								    payload = int(binascii.hexlify(data[::-1]), 16)
 								    encrypted = pow(payload, exponent, modulus)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return f'{encrypted:x}'
-												[utils] Move base62 to utils

											
										
										
											2016-02-24 14:08:40 +00:00
-												[utils] Add pkcs1pad

Used in daisuki.net (#4738)

											
										
										
											2017-02-27 10:50:19 +00:00
+								def pkcs1pad(data, length):
 								    """
 								    Padding input data with PKCS#1 scheme
 								    @param {int[]} data        input data
 								    @param {int}   length      target length
 								    @returns {int[]}           padded data
 								    """
 								    if len(data) > length - 11:
 								        raise ValueError('Input data too long for PKCS#1 padding')
 								    pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return [0, 2, *pseudo_random, 0, *data]
-												[utils] Add pkcs1pad

Used in daisuki.net (#4738)

											
										
										
											2017-02-27 10:50:19 +00:00
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								def _base_n_table(n, table):
 								    if not table and not n:
 								        raise ValueError('Either table or n must be specified')
-												Bugfix for 7b2c3f47c6b586a208655fcfc716bba3f8619d1e

											
										
										
											2022-06-20 06:25:54 +00:00
+								    table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
-												Fix bug in 612f2be5d3924540158dfbe5f25d841f04cff8c6

											
										
										
											2022-06-30 12:59:39 +00:00
+								    if n and n != len(table):
-												Bugfix for 7b2c3f47c6b586a208655fcfc716bba3f8619d1e

											
										
										
											2022-06-20 06:25:54 +00:00
+								        raise ValueError(f'base {n} exceeds table length {len(table)}')
 								    return table
-												[utils] Merge base_n functions

											
										
										
											2016-02-26 06:37:20 +00:00
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											2016-02-26 19:19:50 +00:00
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								def encode_base_n(num, n=None, table=None):
 								    """Convert given int to a base-n string"""
-												Bugfix for 7b2c3f47c6b586a208655fcfc716bba3f8619d1e

											
										
										
											2022-06-20 06:25:54 +00:00
+								    table = _base_n_table(n, table)
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								    if not num:
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											2016-02-26 19:19:50 +00:00
+								        return table[0]
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								    result, base = '', len(table)
-												[utils] Move base62 to utils

											
										
										
											2016-02-24 14:08:40 +00:00
+								    while num:
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								        result = table[num % base] + result
-												Bugfix for 7b2c3f47c6b586a208655fcfc716bba3f8619d1e

											
										
										
											2022-06-20 06:25:54 +00:00
+								        num = num // base
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								    return result
 								def decode_base_n(string, n=None, table=None):
 								    """Convert given base-n string to int"""
 								    table = {char: index for index, char in enumerate(_base_n_table(n, table))}
 								    result, base = 0, len(table)
 								    for char in string:
 								        result = result * base + table[char]
 								    return result
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											2016-02-26 06:58:29 +00:00
+								def decode_packed_codes(code):
-												[utils] Expose PACKED_CODES_RE

											
										
										
											2016-10-19 16:28:49 +00:00
+								    mobj = re.search(PACKED_CODES_RE, code)
-												Updated to release 2020.11.21.1

											
										
										
											2020-11-21 14:50:42 +00:00
+								    obfuscated_code, base, count, symbols = mobj.groups()
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											2016-02-26 06:58:29 +00:00
+								    base = int(base)
 								    count = int(count)
 								    symbols = symbols.split('|')
 								    symbol_table = {}
 								    while count:
 								        count -= 1
-												[utils] Multiple changes to base_n()

1. Renamed to encode_base_n()
2. Allow tables longer than 62 characters
3. Raise ValueError instead of AssertionError for invalid input data
4. Return the first character in the table instead of '0' for number 0
5. Add tests

											
										
										
											2016-02-26 19:19:50 +00:00
+								        base_n_count = encode_base_n(count, base)
-												[utils] Move codes for handling eval() from iqiyi.py

											
										
										
											2016-02-26 06:58:29 +00:00
+								        symbol_table[base_n_count] = symbols[count] or base_n_count
 								    return re.sub(
 								        r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
-												Updated to release 2020.11.21.1

											
										
										
											2020-11-21 14:50:42 +00:00
+								        obfuscated_code)
-												[downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader

											
										
										
											2016-01-10 19:09:53 +00:00
-												[utils] Add generic caesar cipher and rot47

											
										
										
											2019-11-26 19:26:42 +00:00
+								def caesar(s, alphabet, shift):
 								    if shift == 0:
 								        return s
 								    l = len(alphabet)
 								    return ''.join(
 								        alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
 								        for c in s)
 								def rot47(s):
 								    return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
-												[downloader/hls] Add support for AES-128 encrypted segments in hlsnative downloader

											
										
										
											2016-01-10 19:09:53 +00:00
+								def parse_m3u8_attributes(attrib):
 								    info = {}
 								    for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
 								        if val.startswith('"'):
 								            val = val[1:-1]
 								        info[key] = val
 								    return info
-												[utils] Add urshift()

Used in IqiyiIE and LeIE

											
										
										
											2016-06-26 07:16:49 +00:00
 								def urshift(val, n):
 								    return val >> n if val >= 0 else (val + 0x100000000) >> n
-												[utils] Add decode_png for openload (#9706)

											
										
										
											2016-08-06 18:42:58 +00:00
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								def write_xattr(path, key, value):
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    # Windows: Write xattrs to NTFS Alternate Data Streams:
 								    # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
 								    if compat_os_name == 'nt':
 								        assert ':' not in key
 								        assert os.path.exists(path)
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
 								        try:
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								            with open(f'{path}:{key}', 'wb') as f:
 								                f.write(value)
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        except OSError as e:
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
+								            raise XAttrMetadataError(e.errno, e.strerror)
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								        return
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
-												[utils] `write_xattr`: Use `os.setxattr` if available (#8205)

Closes #8193
Authored by: bashonly, Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2023-10-09 18:30:36 +00:00
+								    # UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    setxattr = None
-												[utils] `write_xattr`: Use `os.setxattr` if available (#8205)

Closes #8193
Authored by: bashonly, Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2023-10-09 18:30:36 +00:00
+								    if callable(getattr(os, 'setxattr', None)):
 								        setxattr = os.setxattr
 								    elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								        # Unicode arguments are not supported in pyxattr until version 0.5.0
 								        # See https://github.com/ytdl-org/youtube-dl/issues/5498
 								        if version_tuple(xattr.__version__) >= (0, 5, 0):
 								            setxattr = xattr.set
 								    elif xattr:
 								        setxattr = xattr.setxattr
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    if setxattr:
 								        try:
 								            setxattr(path, key, value)
 								        except OSError as e:
 								            raise XAttrMetadataError(e.errno, e.strerror)
 								        return
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    # UNIX Method 2. Use setfattr/xattr executables
 								    exe = ('setfattr' if check_executable('setfattr', ['--version'])
 								           else 'xattr' if check_executable('xattr', ['-h']) else None)
 								    if not exe:
 								        raise XAttrUnavailableError(
-												[docs] Misc Cleanup (#8977)

Closes #8355, #8944

Authored by: bashonly, Grub4k, Arthurszzz, seproDev, pukkandan

Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: Arthurszzz <minecraftgamerarthur@gmail.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
											
										
										
											2024-03-10 19:18:47 +00:00
+								            'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								            + ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
-												Move write_xattr to utils.py

There are some other places that use xattr functions. It's better to
move it to a common place so that others can use it.

											
										
										
											2016-09-29 16:28:32 +00:00
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    value = value.decode()
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    try:
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								        _, stderr, returncode = Popen.run(
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								            [exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
-												[cleanup] Minor fixes (#4096)

Authored by: christoph-heinrich
											
										
										
											2022-06-18 01:57:22 +00:00
+								            text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
-												[XAttrMetadata] Refactor and document dependencies

											
										
										
											2022-04-30 23:16:05 +00:00
+								    except OSError as e:
 								        raise XAttrMetadataError(e.errno, e.strerror)
-												[utils] `Popen`: Refactor to use contextmanager

Fixes https://github.com/yt-dlp/yt-dlp/issues/3531#issuecomment-1156223597

											
										
										
											2022-06-15 20:55:43 +00:00
+								    if returncode:
 								        raise XAttrMetadataError(returncode, stderr)
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
 								def random_birthday(year_field, month_field, day_field):
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    start_date = dt.date(1950, 1, 1)
 								    end_date = dt.date(1995, 12, 31)
-												[utils] Fix random_birthday to generate existing dates only


											
										
										
											2018-12-01 17:05:15 +00:00
+								    offset = random.randint(0, (end_date - start_date).days)
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    random_date = start_date + dt.timedelta(offset)
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								    return {
-												[utils] Fix random_birthday to generate existing dates only


											
										
										
											2018-12-01 17:05:15 +00:00
+								        year_field: str(random_date.year),
 								        month_field: str(random_date.month),
 								        day_field: str(random_date.day),
-												[cda] Implement birthday verification (closes #12789)

											
										
										
											2017-05-01 15:09:18 +00:00
+								    }
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
-												Preparing for release

											
										
										
											2021-01-07 06:41:05 +00:00
-												[downloader/aria2c] Native progress for aria2c via RPC (#3724)

Authored by: Lesmiscore, pukkandan

Closes #2038
											
										
										
											2023-01-01 17:16:25 +00:00
+								def find_available_port(interface=''):
 								    try:
 								        with socket.socket() as sock:
 								            sock.bind((interface, 0))
 								            return sock.getsockname()[1]
 								    except OSError:
 								        return None
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								# Templates for internet shortcut files, which are plain text files.
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								DOT_URL_LINK_TEMPLATE = '''\
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								[InternetShortcut]
 								URL=%(url)s
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								'''
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								DOT_WEBLOC_LINK_TEMPLATE = '''\
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								<?xml version="1.0" encoding="UTF-8"?>
 								<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 								<plist version="1.0">
 								<dict>
 								\t<key>URL</key>
 								\t<string>%(url)s</string>
 								</dict>
 								</plist>
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								'''
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								DOT_DESKTOP_LINK_TEMPLATE = '''\
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								[Desktop Entry]
 								Encoding=UTF-8
 								Name=%(filename)s
 								Type=Link
 								URL=%(url)s
 								Icon=text-html
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								'''
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
-												[outtmpl] Add type `link` for internet shortcut files
and refactor related code
Closes #1405

											
										
										
											2021-10-26 14:41:59 +00:00
+								LINK_TEMPLATES = {
 								    'url': DOT_URL_LINK_TEMPLATE,
 								    'desktop': DOT_DESKTOP_LINK_TEMPLATE,
 								    'webloc': DOT_WEBLOC_LINK_TEMPLATE,
 								}
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								def iri_to_uri(iri):
 								    """
 								    Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
 								    The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding *besides* those already escaped, leaving the URI intact.
 								    """
-												[compat] Remove deprecated functions from core code

											
										
										
											2022-06-24 10:54:43 +00:00
+								    iri_parts = urllib.parse.urlparse(iri)
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								    if '[' in iri_parts.netloc:
 								        raise ValueError('IPv6 URIs are not, yet, supported.')
 								        # Querying `.netloc`, when there's only one bracket, also raises a ValueError.
 								    # The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
 								    net_location = ''
 								    if iri_parts.username:
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								        net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								        if iri_parts.password is not None:
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								        net_location += '@'
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    net_location += iri_parts.hostname.encode('idna').decode()  # Punycode for Unicode hostnames.
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								    # The 'idna' encoding produces ASCII text.
 								    if iri_parts.port is not None and iri_parts.port != 80:
 								        net_location += ':' + str(iri_parts.port)
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								    return urllib.parse.urlunparse(
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
+								        (iri_parts.scheme,
 								            net_location,
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@|~"),
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								            # Unsure about the `safe` argument, since this is a legacy way of handling parameters.
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@|~"),
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								            # Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{|}~"),
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{|}~")))
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								    # Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
 								def to_high_limit_path(path):
 								    if sys.platform in ['win32', 'cygwin']:
 								        # Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
-												[cleanup] Misc cleanup (#2173)

Authored by: fstirlitz, pukkandan

											
										
										
											2022-04-12 00:01:54 +00:00
+								        return '\\\\?\\' + os.path.abspath(path)
-												Add --write-*-link by h-h-h-h

Authored-by: h-h-h-h

											
										
										
											2020-10-27 10:37:21 +00:00
 								    return path
-												Option to present -F output to a more tabular form

											
										
										
											2020-12-13 14:29:09 +00:00
-												Preparing for release

											
										
										
											2021-01-07 06:41:05 +00:00
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    val = traversal.traverse_obj(obj, *variadic(field))
-												[cleanup] Misc

Closes #7030, closes #6967

											
										
										
											2023-05-19 21:36:23 +00:00
+								    if not val if ignore is NO_DEFAULT else val in variadic(ignore):
-												[cleanup] Use format_field where applicable

											
										
										
											2022-01-21 07:57:40 +00:00
+								        return default
-												[cleanup] Misc

											
										
										
											2022-06-20 06:14:55 +00:00
+								    return template % func(val)
-												Update to ytdl-2021.01.08

											
										
										
											2021-01-08 16:14:50 +00:00
 								def clean_podcast_url(url):
-												[utils] clean_podcast_url: Handle protocol in redirect URL

Closes #7430

											
										
										
											2023-06-26 10:49:49 +00:00
+								    url = re.sub(r'''(?x)
-												Update to ytdl-2021.01.08

											
										
										
											2021-01-08 16:14:50 +00:00
+								        (?:
 								            (?:
 								                chtbl\.com/track|
 								                media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
-												[utils] `clean_podcast_url`: Handle more trackers (#7556)

Authored by: mabdelfattah, bashonly
Closes #7544 
											
										
										
											2023-07-11 01:00:38 +00:00
+								                play\.podtrac\.com|
 								                chrt\.fm/track|
 								                mgln\.ai/e
 								            )(?:/[^/.]+)?|
-												Update to ytdl-2021.01.08

											
										
										
											2021-01-08 16:14:50 +00:00
+								            (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
 								            flex\.acast\.com|
 								            pd(?:
 								                cn\.co| # https://podcorn.com/analytics-prefix/
 								                st\.fm # https://podsights.com/docs/
-												[utils] `clean_podcast_url`: Handle more trackers (#7556)

Authored by: mabdelfattah, bashonly
Closes #7544 
											
										
										
											2023-07-11 01:00:38 +00:00
+								            )/e|
 								            [0-9]\.gum\.fm|
 								            pscrb\.fm/rss/p
-												Update to ytdl-2021.01.08

											
										
										
											2021-01-08 16:14:50 +00:00
+								        )/''', '', url)
-												[utils] clean_podcast_url: Handle protocol in redirect URL

Closes #7430

											
										
										
											2023-06-26 10:49:49 +00:00
+								    return re.sub(r'^\w+://(\w+://)', r'\1', url)
-												#30 [mildom] Add extractor

Authored by @nao20010128nao
											
										
										
											2021-01-22 13:43:30 +00:00
 								_HEX_TABLE = '0123456789abcdef'
 								def random_uuidv4():
 								    return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
-												#29 New option `-P`/`--paths` to give different paths for different types of files

Syntax: `-P "type:path" -P "type:path"`
Types: home, temp, description, annotation, subtitle, infojson, thumbnail

											
										
										
											2021-01-23 12:18:12 +00:00
 								def make_dir(path, to_screen=None):
 								    try:
 								        dn = os.path.dirname(path)
-												[utils] Fix race condition in `make_dir` (#6089)

Authored by: aionescu
											
										
										
											2023-02-17 03:29:32 +00:00
+								        if dn:
 								            os.makedirs(dn, exist_ok=True)
-												#29 New option `-P`/`--paths` to give different paths for different types of files

Syntax: `-P "type:path" -P "type:path"`
Types: home, temp, description, annotation, subtitle, infojson, thumbnail

											
										
										
											2021-01-23 12:18:12 +00:00
+								        return True
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								    except OSError as err:
-												#29 New option `-P`/`--paths` to give different paths for different types of files

Syntax: `-P "type:path" -P "type:path"`
Types: home, temp, description, annotation, subtitle, infojson, thumbnail

											
										
										
											2021-01-23 12:18:12 +00:00
+								        if callable(to_screen) is not None:
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								            to_screen(f'unable to create directory {err}')
-												#29 New option `-P`/`--paths` to give different paths for different types of files

Syntax: `-P "type:path" -P "type:path"`
Types: home, temp, description, annotation, subtitle, infojson, thumbnail

											
										
										
											2021-01-23 12:18:12 +00:00
+								        return False
-												Plugin support

Extractor plugins are loaded from <root-dir>/ytdlp_plugins/extractor/__init__.py

Inspired by https://github.com/un-def/dl-plus

:ci skip dl

											
										
										
											2021-01-24 13:40:02 +00:00
 								def get_executable_path():
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								    from ..update import _get_variant_and_executable_path
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
-												[build, cleanup] Refactor

Closes #3835, #3837

											
										
										
											2022-05-22 11:37:18 +00:00
+								    return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
-												Plugin support

Extractor plugins are loaded from <root-dir>/ytdlp_plugins/extractor/__init__.py

Inspired by https://github.com/un-def/dl-plus

:ci skip dl

											
										
										
											2021-01-24 13:40:02 +00:00
-												Improve plugin architecture (#5553)

to make plugins easier to develop and use:
* Plugins are now loaded as namespace packages.
* Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.).
* Plugin packages can be installed and managed via pip, or dropped into any of the documented locations.
* Users do not need to edit any code files to install plugins.
* Backwards-compatible with previous plugin architecture.

As a side-effect, yt-dlp will now search in a few more locations for config files.

Closes https://github.com/yt-dlp/yt-dlp/issues/1389

Authored by: flashdagger, coletdjnz, pukkandan, Grub4K
Co-authored-by: Marcel <flashdagger@googlemail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
											
										
										
											2023-01-01 04:29:22 +00:00
+								def get_user_config_dirs(package_name):
 								    # .config (e.g. ~/.config/package_name)
 								    xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
-												Fix config locations (#5933)

Bug in 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55
Closes #5953

Authored by: Grub4k, coletdjnz, pukkandan
											
										
										
											2023-01-06 19:01:00 +00:00
+								    yield os.path.join(xdg_config_home, package_name)
-												Improve plugin architecture (#5553)

to make plugins easier to develop and use:
* Plugins are now loaded as namespace packages.
* Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.).
* Plugin packages can be installed and managed via pip, or dropped into any of the documented locations.
* Users do not need to edit any code files to install plugins.
* Backwards-compatible with previous plugin architecture.

As a side-effect, yt-dlp will now search in a few more locations for config files.

Closes https://github.com/yt-dlp/yt-dlp/issues/1389

Authored by: flashdagger, coletdjnz, pukkandan, Grub4K
Co-authored-by: Marcel <flashdagger@googlemail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
											
										
										
											2023-01-01 04:29:22 +00:00
 								    # appdata (%APPDATA%/package_name)
 								    appdata_dir = os.getenv('appdata')
 								    if appdata_dir:
-												Fix config locations (#5933)

Bug in 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55
Closes #5953

Authored by: Grub4k, coletdjnz, pukkandan
											
										
										
											2023-01-06 19:01:00 +00:00
+								        yield os.path.join(appdata_dir, package_name)
-												Improve plugin architecture (#5553)

to make plugins easier to develop and use:
* Plugins are now loaded as namespace packages.
* Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.).
* Plugin packages can be installed and managed via pip, or dropped into any of the documented locations.
* Users do not need to edit any code files to install plugins.
* Backwards-compatible with previous plugin architecture.

As a side-effect, yt-dlp will now search in a few more locations for config files.

Closes https://github.com/yt-dlp/yt-dlp/issues/1389

Authored by: flashdagger, coletdjnz, pukkandan, Grub4K
Co-authored-by: Marcel <flashdagger@googlemail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
											
										
										
											2023-01-01 04:29:22 +00:00
 								    # home (~/.package_name)
-												Fix config locations (#5933)

Bug in 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55
Closes #5953

Authored by: Grub4k, coletdjnz, pukkandan
											
										
										
											2023-01-06 19:01:00 +00:00
+								    yield os.path.join(compat_expanduser('~'), f'.{package_name}')
-												Improve plugin architecture (#5553)

to make plugins easier to develop and use:
* Plugins are now loaded as namespace packages.
* Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.).
* Plugin packages can be installed and managed via pip, or dropped into any of the documented locations.
* Users do not need to edit any code files to install plugins.
* Backwards-compatible with previous plugin architecture.

As a side-effect, yt-dlp will now search in a few more locations for config files.

Closes https://github.com/yt-dlp/yt-dlp/issues/1389

Authored by: flashdagger, coletdjnz, pukkandan, Grub4K
Co-authored-by: Marcel <flashdagger@googlemail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: Simon Sawicki <accounts@grub4k.xyz>
											
										
										
											2023-01-01 04:29:22 +00:00
 								def get_system_config_dirs(package_name):
 								    # /etc/package_name
-												Fix config locations (#5933)

Bug in 8e40b9d1ec132ae1bcac50b3ee520ece46ac9c55
Closes #5953

Authored by: Grub4k, coletdjnz, pukkandan
											
										
										
											2023-01-06 19:01:00 +00:00
+								    yield os.path.join('/etc', package_name)
-												#31 Features from animelover1984/youtube-dl

* Add `--get-comments`
* [youtube] Extract comments
* [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE
* [billibilli] Extract comments
* [billibilli] Better video extraction
* Write playlist data to infojson
* [FFmpegMetadata] Embed infojson inside the video
* [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition`
* [EmbedThumbnail] Treat mka like mkv and mov like mp4
* [EmbedThumbnail] Embed in ogg/opus
* [VideoRemuxer] Conditionally remux video
* [VideoRemuxer] Add `-movflags +faststart` when remuxing from mp4
* [ffmpeg] Print entire stderr in verbose when there is error
* [EmbedSubtitle] Warn when embedding ass in mp4
* [avanto] Use NFLTokenGenerator if possible
											
										
										
											2021-01-27 15:02:51 +00:00
-												[AbemaTV] Add extractors (#1688)

Authored by: Lesmiscore
											
										
										
											2022-02-25 02:14:04 +00:00
+								def time_seconds(**kwargs):
-												[utils] Fix `time_seconds` to use the provided TZ (#6118)

Authored by: Lesmiscore, Grub4K

Fixes https://github.com/yt-dlp/yt-dlp/pull/6056
											
										
										
											2023-01-31 13:30:00 +00:00
+								    """
 								    Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
 								    """
-												[cleanup] Standardize `import datetime as dt` (#8978)

											
										
										
											2024-02-25 00:16:34 +00:00
+								    return time.time() + dt.timedelta(**kwargs).total_seconds()
-												[AbemaTV] Add extractors (#1688)

Authored by: Lesmiscore
											
										
										
											2022-02-25 02:14:04 +00:00
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
+								# create a JSON Web Signature (jws) with HS256 algorithm
 								# the resulting format is in JWS Compact Serialization
 								# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
 								# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
 								def jwt_encode_hs256(payload_data, key, headers={}):
 								    header_data = {
 								        'alg': 'HS256',
 								        'typ': 'JWT',
 								    }
 								    if headers:
 								        header_data.update(headers)
-												[cleanup] Minor fixes (See desc)

* [youtube] Fix `--youtube-skip-dash-manifest`
* [build] Use `$()` in `Makefile`. Closes #3684
* Fix bug in 385ffb467b2285e85a2a5495b90314ba1f8e0700
* Fix bug in 43d7f5a5d0c77556156a3f8caa6976d3908a1e38
* [cleanup] Remove unnecessary `utf-8` from `str.encode`/`bytes.decode`
* [utils] LazyList: Expose unnecessarily "protected" attributes
and other minor cleanup

											
										
										
											2022-05-09 11:54:28 +00:00
+								    header_b64 = base64.b64encode(json.dumps(header_data).encode())
 								    payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
 								    h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
-												[atv.at] Use jwt for API (#1012)

The jwt token is implemented according to RFC7519

Closes #988
Authored by: NeroBurner
											
										
										
											2021-09-23 17:40:51 +00:00
+								    signature_b64 = base64.b64encode(h.digest())
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
-												Improved progress reporting (See desc) (#1125)

* Separate `--console-title` and `--no-progress`
* Add option `--progress` to show progress-bar even in quiet mode
* Fix and refactor `minicurses`
* Use `minicurses` for all progress reporting
* Standardize use of terminal sequences and enable color support for windows 10
* Add option `--progress-template` to customize progress-bar and console-title
* Add postprocessor hooks and progress reporting

Closes: #906, #901, #1085, #1170
											
										
										
											2021-10-08 19:11:59 +00:00
-												[utils] Add `jwt_decode_hs256`
Code from #1340
Authored by: Ashish0804

											
										
										
											2021-10-27 20:37:15 +00:00
+								# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
 								def jwt_decode_hs256(jwt):
 								    header_b64, payload_b64, signature_b64 = jwt.split('.')
-												[extractors/podbayfm] Add extractor (#4971)

Authored by: schnusch
											
										
										
											2022-10-10 20:31:01 +00:00
+								    # add trailing ='s that may have been stripped, superfluous ='s are ignored
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
-												[utils] Add `jwt_decode_hs256`
Code from #1340
Authored by: Ashish0804

											
										
										
											2021-10-27 20:37:15 +00:00
-												[utils] Fix bug in 0b9c08b47bb5e95c21b067044ace4e824d19a9c2

* Cache of `supports_terminal_sequences` must be reset after enabling VT mode
* and move `windows_enable_vt_mode` to utils to avoid cyclic imports

											
										
										
											2022-05-19 21:32:25 +00:00
+								WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								@functools.cache
-												Improved progress reporting (See desc) (#1125)

* Separate `--console-title` and `--no-progress`
* Add option `--progress` to show progress-bar even in quiet mode
* Fix and refactor `minicurses`
* Use `minicurses` for all progress reporting
* Standardize use of terminal sequences and enable color support for windows 10
* Add option `--progress-template` to customize progress-bar and console-title
* Add postprocessor hooks and progress reporting

Closes: #906, #901, #1085, #1170
											
										
										
											2021-10-08 19:11:59 +00:00
+								def supports_terminal_sequences(stream):
 								    if compat_os_name == 'nt':
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								        if not WINDOWS_VT_MODE:
-												Improved progress reporting (See desc) (#1125)

* Separate `--console-title` and `--no-progress`
* Add option `--progress` to show progress-bar even in quiet mode
* Fix and refactor `minicurses`
* Use `minicurses` for all progress reporting
* Standardize use of terminal sequences and enable color support for windows 10
* Add option `--progress-template` to customize progress-bar and console-title
* Add postprocessor hooks and progress reporting

Closes: #906, #901, #1085, #1170
											
										
										
											2021-10-08 19:11:59 +00:00
+								            return False
 								    elif not os.getenv('TERM'):
 								        return False
 								    try:
 								        return stream.isatty()
 								    except BaseException:
 								        return False
-												[utils] windows_enable_vt_mode: Proper implementation

Authored by: Grub4K

											
										
										
											2022-12-04 19:36:37 +00:00
+								def windows_enable_vt_mode():
 								    """Ref: https://bugs.python.org/issue30075 """
-												[cleanup] Misc fixes and cleanup

Closes #3780, Closes #3853, Closes #3850

											
										
										
											2022-05-26 23:06:23 +00:00
+								    if get_windows_version() < (10, 0, 10586):
-												[utils] Fix bug in 0b9c08b47bb5e95c21b067044ace4e824d19a9c2

* Cache of `supports_terminal_sequences` must be reset after enabling VT mode
* and move `windows_enable_vt_mode` to utils to avoid cyclic imports

											
										
										
											2022-05-19 21:32:25 +00:00
+								        return
-												[utils] windows_enable_vt_mode: Proper implementation

Authored by: Grub4K

											
										
										
											2022-12-04 19:36:37 +00:00
+								    import ctypes
 								    import ctypes.wintypes
 								    import msvcrt
 								    ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
 								    dll = ctypes.WinDLL('kernel32', use_last_error=False)
 								    handle = os.open('CONOUT$', os.O_RDWR)
 								    try:
 								        h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
 								        dw_original_mode = ctypes.wintypes.DWORD()
 								        success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
 								        if not success:
 								            raise Exception('GetConsoleMode failed')
 								        success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
 								            dw_original_mode.value | ENABLE_VIRTUAL_TERMINAL_PROCESSING))
 								        if not success:
 								            raise Exception('SetConsoleMode failed')
 								    finally:
 								        os.close(handle)
-												[utils] Fix bug in 0b9c08b47bb5e95c21b067044ace4e824d19a9c2

* Cache of `supports_terminal_sequences` must be reset after enabling VT mode
* and move `windows_enable_vt_mode` to utils to avoid cyclic imports

											
										
										
											2022-05-19 21:32:25 +00:00
-												[utils] `windows_enable_vt_mode`: Better error handling

Closes #5927

											
										
										
											2023-01-03 05:53:34 +00:00
+								    global WINDOWS_VT_MODE
 								    WINDOWS_VT_MODE = True
 								    supports_terminal_sequences.cache_clear()
-												[utils] Fix bug in 0b9c08b47bb5e95c21b067044ace4e824d19a9c2

* Cache of `supports_terminal_sequences` must be reset after enabling VT mode
* and move `windows_enable_vt_mode` to utils to avoid cyclic imports

											
										
										
											2022-05-19 21:32:25 +00:00
-												[minicurses] Add more colors

											
										
										
											2021-10-20 16:37:32 +00:00
+								_terminal_sequences_re = re.compile('\033\\[[^m]+m')
 								def remove_terminal_sequences(string):
 								    return _terminal_sequences_re.sub('', string)
 								def number_of_digits(number):
 								    return len('%d' % number)
-												[utils] Add `join_nonempty`

											
										
										
											2021-11-06 01:05:24 +00:00
 								def join_nonempty(*values, delim='-', from_dict=None):
 								    if from_dict is not None:
-												[cleanup, utils] Split into submodules (#7090)

Closes https://github.com/yt-dlp/yt-dlp/pull/2173

Authored by: pukkandan, coletdjnz
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
											
										
										
											2023-05-20 21:56:23 +00:00
+								        values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
-												[utils] Add `join_nonempty`

											
										
										
											2021-11-06 01:05:24 +00:00
+								    return delim.join(map(str, filter(None, values)))
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
-												[ant1newsgr] Add extractor (#1982)

Authored by: zmousm
											
										
										
											2022-03-04 21:52:48 +00:00
+								def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
 								    """
 								    Find the largest format dimensions in terms of video width and, for each thumbnail:
 								    * Modify the URL: Match the width with the provided regex and replace with the former width
 								    * Update dimensions
 								    This function is useful with video services that scale the provided thumbnails on demand
 								    """
 								    _keys = ('width', 'height')
 								    max_dimensions = max(
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        (tuple(fmt.get(k) or 0 for k in _keys) for fmt in formats),
-												[ant1newsgr] Add extractor (#1982)

Authored by: zmousm
											
										
										
											2022-03-04 21:52:48 +00:00
+								        default=(0, 0))
 								    if not max_dimensions[0]:
 								        return thumbnails
 								    return [
 								        merge_dicts(
 								            {'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
 								            dict(zip(_keys, max_dimensions)), thumbnail)
 								        for thumbnail in thumbnails
 								    ]
-												[downloader/fragment] Fix bugs around resuming with Range (#2901)

Authored by: Lesmiscore
											
										
										
											2022-02-28 04:10:54 +00:00
+								def parse_http_range(range):
 								    """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """
 								    if not range:
 								        return None, None, None
 								    crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
 								    if not crg:
 								        return None, None, None
 								    return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
-												`--config-location -` to provide options interactively

											
										
										
											2022-05-24 12:00:28 +00:00
+								def read_stdin(what):
-												Let `read_stdin` obey `--quiet`

Closes #8668

											
										
										
											2023-11-28 21:48:17 +00:00
+								    if what:
 								        eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
 								        write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
-												`--config-location -` to provide options interactively

											
										
										
											2022-05-24 12:00:28 +00:00
+								    return sys.stdin
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								def determine_file_encoding(data):
 								    """
-												Fix a904a7f8c6edc42046f0a78fb279739d500d4887

											
										
										
											2022-07-15 16:14:07 +00:00
+								    Detect the text encoding used
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								    @returns (encoding, bytes to skip)
 								    """
-												Fix a904a7f8c6edc42046f0a78fb279739d500d4887

											
										
										
											2022-07-15 16:14:07 +00:00
+								    # BOM marks are given priority over declarations
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								    for bom, enc in BOMS:
 								        if data.startswith(bom):
 								            return enc, len(bom)
-												Fix a904a7f8c6edc42046f0a78fb279739d500d4887

											
										
										
											2022-07-15 16:14:07 +00:00
+								    # Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
 								    # We ignore the endianness to get a good enough match
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								    data = data.replace(b'\0', b'')
-												Fix a904a7f8c6edc42046f0a78fb279739d500d4887

											
										
										
											2022-07-15 16:14:07 +00:00
+								    mobj = re.match(rb'(?m)^#\s*coding\s*:\s*(\S+)\s*$', data)
 								    return mobj.group(1).decode() if mobj else None, 0
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								class Config:
 								    own_args = None
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								    parsed_args = None
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								    filename = None
 								    __initialized = False
 								    def __init__(self, parser, label=None):
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								        self.parser, self.label = parser, label
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								        self._loaded_paths, self.configs = set(), []
 								    def init(self, args=None, filename=None):
 								        assert not self.__initialized
-												[options] Fix aliases to `--config-location`

											
										
										
											2022-07-01 04:00:21 +00:00
+								        self.own_args, self.filename = args, filename
 								        return self.load_configs()
 								    def load_configs(self):
-												Make nested --config-locations relative to parent file

* and allow environment variables in it so that you can use `$PWD`/`%cd%`
to specify paths relative to current directory

											
										
										
											2022-02-03 12:48:18 +00:00
+								        directory = ''
-												[options] Fix aliases to `--config-location`

											
										
										
											2022-07-01 04:00:21 +00:00
+								        if self.filename:
 								            location = os.path.realpath(self.filename)
-												Make nested --config-locations relative to parent file

* and allow environment variables in it so that you can use `$PWD`/`%cd%`
to specify paths relative to current directory

											
										
										
											2022-02-03 12:48:18 +00:00
+								            directory = os.path.dirname(location)
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								            if location in self._loaded_paths:
 								                return False
 								            self._loaded_paths.add(location)
-												[options] Fix aliases to `--config-location`

											
										
										
											2022-07-01 04:00:21 +00:00
+								        self.__initialized = True
 								        opts, _ = self.parser.parse_known_args(self.own_args)
 								        self.parsed_args = self.own_args
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								        for location in opts.config_locations or []:
-												`--config-location -` to provide options interactively

											
										
										
											2022-05-24 12:00:28 +00:00
+								            if location == '-':
-												Fix `--config-location -`

											
										
										
											2022-09-13 10:48:15 +00:00
+								                if location in self._loaded_paths:
 								                    continue
 								                self._loaded_paths.add(location)
-												`--config-location -` to provide options interactively

											
										
										
											2022-05-24 12:00:28 +00:00
+								                self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
 								                continue
-												Make nested --config-locations relative to parent file

* and allow environment variables in it so that you can use `$PWD`/`%cd%`
to specify paths relative to current directory

											
										
										
											2022-02-03 12:48:18 +00:00
+								            location = os.path.join(directory, expand_path(location))
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								            if os.path.isdir(location):
 								                location = os.path.join(location, 'yt-dlp.conf')
 								            if not os.path.exists(location):
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								                self.parser.error(f'config location {location} does not exist')
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								            self.append_config(self.read_file(location), location)
 								        return True
 								    def __str__(self):
 								        label = join_nonempty(
 								            self.label, 'config', f'"{self.filename}"' if self.filename else '',
 								            delim=' ')
 								        return join_nonempty(
 								            self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
 								            *(f'\n{c}'.replace('\n', '\n| ')[1:] for c in self.configs),
 								            delim='\n')
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @staticmethod
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								    def read_file(filename, default=[]):
 								        try:
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								            optionf = open(filename, 'rb')
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        except OSError:
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								            return default  # silently skip if file is not present
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								        try:
 								            enc, skip = determine_file_encoding(optionf.read(512))
 								            optionf.seek(skip, io.SEEK_SET)
 								        except OSError:
 								            enc = None  # silently skip read errors
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								        try:
 								            # FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
-												Allow users to specify encoding in each config files (#4357)

Authored by: Lesmiscore
											
										
										
											2022-07-15 11:52:14 +00:00
+								            contents = optionf.read().decode(enc or preferredencoding())
-												[cleanup] Mark some compat variables for removal (#2173)

Authored by fstirlitz, pukkandan

											
										
										
											2022-04-11 20:09:26 +00:00
+								            res = shlex.split(contents, comments=True)
-												Improve error handling of bad config files

Related: #824

											
										
										
											2022-06-18 03:47:45 +00:00
+								        except Exception as err:
 								            raise ValueError(f'Unable to parse "{filename}": {err}')
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								        finally:
 								            optionf.close()
 								        return res
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @staticmethod
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								    def hide_login_info(opts):
-												[cleanup] Upgrade syntax

Using https://github.com/asottile/pyupgrade

1. `__future__` imports and `coding: utf-8` were removed
2. Files were rewritten with `pyupgrade --py36-plus --keep-percent-format`
3. f-strings were cherry-picked from `pyupgrade --py36-plus`

Extractors are left untouched (except removing header) to avoid unnecessary merge conflicts

											
										
										
											2022-04-11 15:10:28 +00:00
+								        PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								        eqre = re.compile('^(?P<key>' + ('|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
 								        def _scrub_eq(o):
 								            m = eqre.match(o)
 								            if m:
 								                return m.group('key') + '=PRIVATE'
 								            else:
 								                return o
 								        opts = list(map(_scrub_eq, opts))
 								        for idx, opt in enumerate(opts):
 								            if opt in PRIVATE_OPTS and idx + 1 < len(opts):
 								                opts[idx + 1] = 'PRIVATE'
 								        return opts
 								    def append_config(self, *args, label=None):
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								        config = type(self)(self.parser, label)
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								        config._loaded_paths = self._loaded_paths
 								        if config.init(*args):
 								            self.configs.append(config)
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @property
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
+								    def all_args(self):
 								        for config in reversed(self.configs):
 								            yield from config.all_args
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								        yield from self.parsed_args or []
 								    def parse_known_args(self, **kwargs):
 								        return self.parser.parse_known_args(self.all_args, **kwargs)
-												Allow multiple and nested configuration files

											
										
										
											2021-12-14 17:03:47 +00:00
 								    def parse_args(self):
-												Add option `--alias`

											
										
										
											2022-05-19 14:15:21 +00:00
+								        return self.parser.parse_args(self.all_args)
-												[utils] WebSockets wrapper for non-async functions (#2417)

Authored by: Lesmiscore
											
										
										
											2022-02-13 05:58:21 +00:00
-												Implement `--add-header` without modifying `std_headers`

Closes #2526, #1614

											
										
										
											2022-01-28 21:55:35 +00:00
+								def merge_headers(*dicts):
-												[cleanup, docs] Misc cleanup

Closes #2828, closes #2734, closes #2802, closes #2937

											
										
										
											2022-03-04 14:08:55 +00:00
+								    """Merge dicts of http headers case insensitively, prioritizing the latter ones"""
-												Fix case of `http_headers`

Bug in 8b7539d27c0a47d8d08e0522bdb66c571483377b

Fixes https://github.com/yt-dlp/yt-dlp/issues/1346#issuecomment-1064527765

											
										
										
											2022-03-11 08:54:45 +00:00
+								    return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
-												[downloader] Fix invocation of `HttpieFD`

Closes #3154

											
										
										
											2022-03-25 07:08:33 +00:00
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								def cached_method(f):
 								    """Cache a method"""
 								    signature = inspect.signature(f)
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @functools.wraps(f)
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								    def wrapper(self, *args, **kwargs):
 								        bound_args = signature.bind(self, *args, **kwargs)
 								        bound_args.apply_defaults()
-												[cleanup Misc

Closes #5162

											
										
										
											2022-10-18 17:58:57 +00:00
+								        key = tuple(bound_args.arguments.values())[1:]
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
-												[cleanup] Misc

Closes #5541

											
										
										
											2022-11-16 00:57:43 +00:00
+								        cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
-												[update] Ability to set a maximum version for specific variants

											
										
										
											2022-06-29 01:13:24 +00:00
+								        if key not in cache:
 								            cache[key] = f(self, *args, **kwargs)
 								        return cache[key]
 								    return wrapper
-												[downloader] Fix invocation of `HttpieFD`

Closes #3154

											
										
										
											2022-03-25 07:08:33 +00:00
+								class classproperty:
-												[utils] `classproperty`: Add cache support

											
										
										
											2022-11-13 02:59:49 +00:00
+								    """property access for class methods with optional caching"""
 								    def __new__(cls, func=None, *args, **kwargs):
 								        if not func:
 								            return functools.partial(cls, *args, **kwargs)
 								        return super().__new__(cls)
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
-												[utils] `classproperty`: Add cache support

											
										
										
											2022-11-13 02:59:49 +00:00
+								    def __init__(self, func, *, cache=False):
-												[cleanup] Misc

											
										
										
											2022-04-17 17:18:50 +00:00
+								        functools.update_wrapper(self, func)
 								        self.func = func
-												[utils] `classproperty`: Add cache support

											
										
										
											2022-11-13 02:59:49 +00:00
+								        self._cache = {} if cache else None
-												[downloader] Fix invocation of `HttpieFD`

Closes #3154

											
										
										
											2022-03-25 07:08:33 +00:00
 								    def __get__(self, _, cls):
-												[utils] `classproperty`: Add cache support

											
										
										
											2022-11-13 02:59:49 +00:00
+								        if self._cache is None:
 								            return self.func(cls)
 								        elif cls not in self._cache:
 								            self._cache[cls] = self.func(cls)
 								        return self._cache[cls]
-												[cleanup] Misc cleanup and refactor (#2173)

											
										
										
											2022-04-17 20:58:28 +00:00
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
+								class function_with_repr:
-												[cleanup, jsinterp] Give functions names to help debugging

											
										
										
											2023-03-03 17:54:50 +00:00
+								    def __init__(self, func, repr_=None):
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
+								        functools.update_wrapper(self, func)
-												[cleanup, jsinterp] Give functions names to help debugging

											
										
										
											2023-03-03 17:54:50 +00:00
+								        self.func, self.__repr = func, repr_
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
 								    def __call__(self, *args, **kwargs):
 								        return self.func(*args, **kwargs)
-												[utils] Improve `repr` of `DateRange`, `match_filter_func`

											
										
										
											2024-01-31 10:27:37 +00:00
+								    @classmethod
 								    def set_repr(cls, repr_):
 								        return functools.partial(cls, repr_=repr_)
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
+								    def __repr__(self):
-												[cleanup, jsinterp] Give functions names to help debugging

											
										
										
											2023-03-03 17:54:50 +00:00
+								        if self.__repr:
 								            return self.__repr
-												[cleanup] Misc

Closes #5897

											
										
										
											2023-02-17 12:22:22 +00:00
+								        return f'{self.func.__module__}.{self.func.__qualname__}'
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								class Namespace(types.SimpleNamespace):
-												Fix color in `-q -F`

and convert `ydl._out_files`/`ydl._allow_colors` to `Namespace`

Closes #3761

											
										
										
											2022-05-17 13:06:29 +00:00
+								    """Immutable namespace"""
-												Bugfix for 591bb9d3553a4d7b453777c1e28e0948741e3b50

Closes #3769

											
										
										
											2022-05-17 16:38:12 +00:00
+								    def __iter__(self):
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								        return iter(self.__dict__.values())
-												Bugfix for 591bb9d3553a4d7b453777c1e28e0948741e3b50

Closes #3769

											
										
										
											2022-05-17 16:38:12 +00:00
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @property
-												[cleanup] Misc fixes (see desc)

* [tvver] Fix bug in 6837633a4a614920b6e43ffc6b4b8590dca8c9d7 - Closes #4054
* [rumble] Fix tests - Closes #3976
* [make] Remove `cat` abuse - Closes #3989
* [make] Revert #3684 - Closes #3814
* [utils] Improve `get_elements_by_class` - Closes #3993
* [utils] Inherit `Namespace` from `types.SimpleNamespace`
* [utils] Use `re.fullmatch` for matching filters
* [jsinterp] Handle quotes in `_separate`
* [make_readme] Allow overshooting last line

Authored by: pukkandan, kwconder, MrRawes, Lesmiscore

											
										
										
											2022-05-25 12:23:46 +00:00
+								    def items_(self):
 								        return self.__dict__.items()
-												[dependencies] Create module with all dependency imports

											
										
										
											2022-04-20 19:05:57 +00:00
-												[utils, cleanup] Consolidate known media extensions

											
										
										
											2022-07-30 20:45:22 +00:00
+								MEDIA_EXTENSIONS = Namespace(
 								    common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
 								    video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
 								    common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
-												[cleanup] Misc (#10383)

Authored by: bashonly
											
										
										
											2024-07-07 21:23:40 +00:00
+								    audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
-												[utils, cleanup] Consolidate known media extensions

											
										
										
											2022-07-30 20:45:22 +00:00
+								    thumbnails=('jpg', 'png', 'webp'),
 								    storyboards=('mhtml', ),
 								    subtitles=('srt', 'vtt', 'ass', 'lrc'),
 								    manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
 								)
 								MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
 								MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
 								KNOWN_EXTENSIONS = (*MEDIA_EXTENSIONS.video, *MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								class _UnsafeExtensionError(Exception):
 								    """
 								    Mitigation exception for uncommon/malicious file extensions
 								    This should be caught in YoutubeDL.py alongside a warning
 								    Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
 								    """
 								    ALLOWED_EXTENSIONS = frozenset([
 								        # internal
 								        'description',
 								        'json',
 								        'meta',
 								        'orig',
 								        'part',
 								        'temp',
 								        'uncut',
 								        'unknown_video',
 								        'ytdl',
 								        # video
 								        *MEDIA_EXTENSIONS.video,
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'asx',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'ismv',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'm2t',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'm2ts',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'm2v',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'm4s',
 								        'mng',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'mp2v',
 								        'mp4v',
 								        'mpe',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'mpeg',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'mpeg1',
 								        'mpeg2',
 								        'mpeg4',
 								        'mxf',
 								        'ogm',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'qt',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'rm',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'swf',
 								        'ts',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'vob',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'vp9',
 								        # audio
 								        *MEDIA_EXTENSIONS.audio,
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        '3ga',
 								        'ac3',
 								        'adts',
 								        'aif',
 								        'au',
 								        'dts',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'isma',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'it',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'mid',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'mod',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'mpga',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'mp1',
 								        'mp2',
 								        'mp4a',
 								        'mpa',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'ra',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'shn',
 								        'xm',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
 								        # image
 								        *MEDIA_EXTENSIONS.thumbnails,
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'avif',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'bmp',
 								        'gif',
 								        'heic',
 								        'ico',
 								        'jng',
 								        'jpeg',
 								        'jxl',
 								        'svg',
 								        'tif',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'tiff',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'wbmp',
 								        # subtitle
 								        *MEDIA_EXTENSIONS.subtitles,
 								        'dfxp',
 								        'fs',
 								        'ismt',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'json3',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'sami',
 								        'scc',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'srv1',
 								        'srv2',
 								        'srv3',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        'ssa',
 								        'tt',
 								        'ttml',
-												[core] Address gaps in allowed extensions (#10362)

Adds some extensions missing in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10360, Closes #10365
Authored by: bashonly
											
										
										
											2024-07-05 23:17:47 +00:00
+								        'xml',
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
 								        # others
 								        *MEDIA_EXTENSIONS.manifests,
 								        *MEDIA_EXTENSIONS.storyboards,
 								        'desktop',
 								        'ism',
 								        'm3u',
 								        'sbv',
 								        'url',
 								        'webloc',
 								    ])
 								    def __init__(self, extension, /):
 								        super().__init__(f'unsafe file extension: {extension!r}')
 								        self.extension = extension
 								    @classmethod
 								    def sanitize_extension(cls, extension, /, *, prepend=False):
-												[core] Fix `--ignore-no-formats-error` (#10345)

Fixes regression in 5ce582448ececb8d9c30c8c31f58330090ced03a

Closes #10344
Authored by: Grub4K

Co-authored-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-07-03 16:46:01 +00:00
+								        if extension is None:
 								            return None
-												[core] Disallow unsafe extensions (CVE-2024-38519)

Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j

Authored by: Grub4K

											
										
										
											2024-07-01 22:52:50 +00:00
+								        if '/' in extension or '\\' in extension:
 								            raise cls(extension)
 								        if not prepend:
 								            _, _, last = extension.rpartition('.')
 								            if last == 'bin':
 								                extension = last = 'unknown_video'
 								            if last.lower() not in cls.ALLOWED_EXTENSIONS:
 								                raise cls(extension)
 								        return extension
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
+								class RetryManager:
 								    """Usage:
 								        for retry in RetryManager(...):
 								            try:
 								                ...
 								            except SomeException as err:
 								                retry.error = err
 								                continue
 								    """
 								    attempt, _error = 0, None
 								    def __init__(self, _retries, _error_callback, **kwargs):
 								        self.retries = _retries or 0
 								        self.error_callback = functools.partial(_error_callback, **kwargs)
 								    def _should_retry(self):
 								        return self._error is not NO_DEFAULT and self.attempt <= self.retries
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @property
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
+								    def error(self):
 								        if self._error is NO_DEFAULT:
 								            return None
 								        return self._error
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @error.setter
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
+								    def error(self, value):
 								        self._error = value
 								    def __iter__(self):
 								        while self._should_retry():
 								            self.error = NO_DEFAULT
 								            self.attempt += 1
 								            yield self
 								            if self.error:
 								                self.error_callback(self.error, self.attempt, self.retries)
-												[cleanup] Fix flake8 and minor refactor

Issues from ab029d7e9200a273d7204be68c0735b16971ff44, 1fb53b946c5aca3755bf72cc1c204925043b04f7

											
										
										
											2022-09-27 03:02:57 +00:00
+								    @staticmethod
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
+								    def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
 								        """Utility function for reporting retries"""
 								        if count > retries:
 								            if error:
 								                return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
 								            raise e
 								        if not count:
 								            return warn(e)
 								        elif isinstance(e, ExtractorError):
-												[youtube] Fix error reporting of "Incomplete data"

Related: #4669

											
										
										
											2022-08-16 16:31:48 +00:00
+								            e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
-												Standardize retry mechanism (#1649)

* [utils] Create `RetryManager`
* Migrate all retries to use the manager
* [extractor] Add wrapper methods for convenience
* Standardize console messages for retries
* Add `--retry-sleep` for extractors
											
										
										
											2022-08-01 20:13:18 +00:00
+								        warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
 								        delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
 								        if delay:
 								            info(f'Sleeping {delay:.2f} seconds ...')
 								            time.sleep(delay)
-												Minor bugfixes

											
										
										
											2022-08-01 22:10:47 +00:00
+								def make_archive_id(ie, video_id):
 								    ie_key = ie if isinstance(ie, str) else ie.ie_key()
 								    return f'{ie_key.lower()} {video_id}'
-												[jsinterp] Truncate error messages

Related: #4635

											
										
										
											2022-08-12 13:23:53 +00:00
+								def truncate_string(s, left, right=0):
 								    assert left > 3 and right >= 0
 								    if s is None or len(s) <= left + right:
 								        return s
-												[cleanup] Misc (#8598)

Authored by: bashonly, pukkandan, seproDev, Grub4K

Co-authored-by: bashonly <bashonly@protonmail.com>
Co-authored-by: pukkandan <pukkandan.ytdlp@gmail.com>
Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
											
										
										
											2023-12-30 21:27:36 +00:00
+								    return f'{s[:left - 3]}...{s[-right:] if right else ""}'
-												[jsinterp] Truncate error messages

Related: #4635

											
										
										
											2022-08-12 13:23:53 +00:00
-												[utils] Add orderedSet_from_options

											
										
										
											2022-08-24 02:08:55 +00:00
+								def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
 								    assert 'all' in alias_dict, '"all" alias is required'
 								    requested = list(start or [])
 								    for val in options:
 								        discard = val.startswith('-')
 								        if discard:
 								            val = val[1:]
 								        if val in alias_dict:
 								            val = alias_dict[val] if not discard else [
 								                i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
 								            # NB: Do not allow regex in aliases for performance
 								            requested = orderedSet_from_options(val, alias_dict, start=requested)
 								            continue
 								        current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
 								                   else [val] if val in alias_dict['all'] else None)
 								        if current is None:
 								            raise ValueError(val)
 								        if discard:
 								            for item in current:
 								                while item in requested:
 								                    requested.remove(item)
 								        else:
 								            requested.extend(current)
 								    return orderedSet(requested)
-												[utils] `FormatSorter`: Improve `size` and `br`

Closes #1596

Previously, when some formats have accurate size and some approximate,
the ones with accurate size was always prioritized

For formats with known tbr and unknown vbr/abr, we were setting
(vbr=tbr, abr=0) for sorting to work. This is no longer needed.

Authored by pukkandan, u-spec-png

											
										
										
											2023-06-19 08:36:39 +00:00
+								# TODO: Rewrite
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								class FormatSorter:
 								    regex = r' *((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.*?))?)? *$'
 								    default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
 								               'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
 								               'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id')  # These must not be aliases
 								    ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
 								                    'height', 'width', 'proto', 'vext', 'abr', 'aext',
 								                    'fps', 'fs_approx', 'source', 'id')
 								    settings = {
 								        'vcodec': {'type': 'ordered', 'regex': True,
 								                   'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265|he?vc?', '[hx]264|avc', 'vp0?8', 'mp4v|h263', 'theora', '', None, 'none']},
 								        'acodec': {'type': 'ordered', 'regex': True,
-												Add `ac4` to known codecs

Note: ffmpeg does not currently support this format

Related #5738

											
										
										
											2022-12-09 09:47:16 +00:00
+								                   'order': ['[af]lac', 'wav|aiff', 'opus', 'vorbis|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
 								                'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
 								        'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
 								                  'order': ['(ht|f)tps', '(ht|f)tp$', 'm3u8.*', '.*dash', 'websocket_frag', 'rtmpe?', '', 'mms|rtsp', 'ws|websocket', 'f4']},
 								        'vext': {'type': 'ordered', 'field': 'video_ext',
-												[FormatSort] Add `mov` to `vext`

Closes #5581

											
										
										
											2022-11-18 06:01:15 +00:00
+								                 'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
 								                 'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
-												Add `weba` to known extensions

											
										
										
											2022-12-30 10:00:56 +00:00
+								        'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
 								                 'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
 								                 'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
 								        'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
 								                       'field': ('vcodec', 'acodec'),
 								                       'function': lambda it: int(any(v != 'none' for v in it))},
 								        'ie_pref': {'priority': True, 'type': 'extractor'},
 								        'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
 								        'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
 								        'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
 								        'quality': {'convert': 'float', 'default': -1},
 								        'filesize': {'convert': 'bytes'},
 								        'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
 								        'id': {'convert': 'string', 'field': 'format_id'},
 								        'height': {'convert': 'float_none'},
 								        'width': {'convert': 'float_none'},
 								        'fps': {'convert': 'float_none'},
 								        'channels': {'convert': 'float_none', 'field': 'audio_channels'},
 								        'tbr': {'convert': 'float_none'},
 								        'vbr': {'convert': 'float_none'},
 								        'abr': {'convert': 'float_none'},
 								        'asr': {'convert': 'float_none'},
 								        'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
 								        'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
-												[cleanup] Misc

											
										
										
											2023-06-22 04:32:38 +00:00
+								        'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
-												[utils] `FormatSorter`: Improve `size` and `br`

Closes #1596

Previously, when some formats have accurate size and some approximate,
the ones with accurate size was always prioritized

For formats with known tbr and unknown vbr/abr, we were setting
(vbr=tbr, abr=0) for sorting to work. This is no longer needed.

Authored by pukkandan, u-spec-png

											
										
										
											2023-06-19 08:36:39 +00:00
+								               'function': lambda it: next(filter(None, it), None)},
-												[cleanup] Misc

											
										
										
											2023-06-22 04:32:38 +00:00
+								        'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
-												[utils] `FormatSorter`: Improve `size` and `br`

Closes #1596

Previously, when some formats have accurate size and some approximate,
the ones with accurate size was always prioritized

For formats with known tbr and unknown vbr/abr, we were setting
(vbr=tbr, abr=0) for sorting to work. This is no longer needed.

Authored by pukkandan, u-spec-png

											
										
										
											2023-06-19 08:36:39 +00:00
+								                 'function': lambda it: next(filter(None, it), None)},
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        'ext': {'type': 'combined', 'field': ('vext', 'aext')},
 								        'res': {'type': 'multiple', 'field': ('height', 'width'),
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                'function': lambda it: min(filter(None, it), default=0)},
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
 								        # Actual field names
 								        'format_id': {'type': 'alias', 'field': 'id'},
 								        'preference': {'type': 'alias', 'field': 'ie_pref'},
 								        'language_preference': {'type': 'alias', 'field': 'lang'},
 								        'source_preference': {'type': 'alias', 'field': 'source'},
 								        'protocol': {'type': 'alias', 'field': 'proto'},
 								        'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
 								        'audio_channels': {'type': 'alias', 'field': 'channels'},
 								        # Deprecated
 								        'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
 								        'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
 								        'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
 								        'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
 								        'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
 								        'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
 								        'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
 								        'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
 								        'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
 								        'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
 								        'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
 								        'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
 								        'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
 								        'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
 								        'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
 								        'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
 								        'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
 								        'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
 								        'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
 								        'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
 								    }
 								    def __init__(self, ydl, field_preference):
 								        self.ydl = ydl
 								        self._order = []
 								        self.evaluate_params(self.ydl.params, field_preference)
 								        if ydl.params.get('verbose'):
 								            self.print_verbose_info(self.ydl.write_debug)
 								    def _get_field_setting(self, field, key):
 								        if field not in self.settings:
 								            if key in ('forced', 'priority'):
 								                return False
 								            self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
 								                                        'deprecated and may be removed in a future version')
 								            self.settings[field] = {}
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        prop_obj = self.settings[field]
 								        if key not in prop_obj:
 								            type_ = prop_obj.get('type')
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            if key == 'field':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                default = 'preference' if type_ == 'extractor' else (field,) if type_ in ('combined', 'multiple') else field
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            elif key == 'convert':
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                default = 'order' if type_ == 'ordered' else 'float_string' if field else 'ignore'
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            else:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key)
 								            prop_obj[key] = default
 								        return prop_obj[key]
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    def _resolve_field_value(self, field, value, convert_none=False):
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        if value is None:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            if not convert_none:
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								                return None
 								        else:
 								            value = value.lower()
 								        conversion = self._get_field_setting(field, 'convert')
 								        if conversion == 'ignore':
 								            return None
 								        if conversion == 'string':
 								            return value
 								        elif conversion == 'float_none':
 								            return float_or_none(value)
 								        elif conversion == 'bytes':
 								            return parse_bytes(value)
 								        elif conversion == 'order':
 								            order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
 								            use_regex = self._get_field_setting(field, 'regex')
 								            list_length = len(order_list)
 								            empty_pos = order_list.index('') if '' in order_list else list_length + 1
 								            if use_regex and value is not None:
 								                for i, regex in enumerate(order_list):
 								                    if regex and re.match(regex, value):
 								                        return list_length - i
 								                return list_length - empty_pos  # not in list
 								            else:  # not regex or  value = None
 								                return list_length - (order_list.index(value) if value in order_list else empty_pos)
 								        else:
 								            if value.isnumeric():
 								                return float(value)
 								            else:
 								                self.settings[field]['convert'] = 'string'
 								                return value
 								    def evaluate_params(self, params, sort_extractor):
 								        self._use_free_order = params.get('prefer_free_formats', False)
 								        self._sort_user = params.get('format_sort', [])
 								        self._sort_extractor = sort_extractor
 								        def add_item(field, reverse, closest, limit_text):
 								            field = field.lower()
 								            if field in self._order:
 								                return
 								            self._order.append(field)
 								            limit = self._resolve_field_value(field, limit_text)
 								            data = {
 								                'reverse': reverse,
 								                'closest': False if limit is None else closest,
 								                'limit_text': limit_text,
 								                'limit': limit}
 								            if field in self.settings:
 								                self.settings[field].update(data)
 								            else:
 								                self.settings[field] = data
 								        sort_list = (
 								            tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
 								            + (tuple() if params.get('format_sort_force', False)
 								                else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
 								            + tuple(self._sort_user) + tuple(sort_extractor) + self.default)
 								        for item in sort_list:
 								            match = re.match(self.regex, item)
 								            if match is None:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								                raise ExtractorError(f'Invalid format sort string "{item}" given by extractor')
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            field = match.group('field')
 								            if field is None:
 								                continue
 								            if self._get_field_setting(field, 'type') == 'alias':
 								                alias, field = field, self._get_field_setting(field, 'field')
 								                if self._get_field_setting(alias, 'deprecated'):
 								                    self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
 								                                                f'be removed in a future version. Please use {field} instead')
 								            reverse = match.group('reverse') is not None
 								            closest = match.group('separator') == '~'
 								            limit_text = match.group('limit')
 								            has_limit = limit_text is not None
 								            has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
 								            has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
 								            fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
 								            limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
 								            limit_count = len(limits)
 								            for (i, f) in enumerate(fields):
 								                add_item(f, reverse, closest,
 								                         limits[i] if i < limit_count
 								                         else limits[0] if has_limit and not has_multiple_limits
 								                         else None)
 								    def print_verbose_info(self, write_debug):
 								        if self._sort_user:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            write_debug('Sort order given by user: {}'.format(', '.join(self._sort_user)))
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        if self._sort_extractor:
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            write_debug('Sort order given by extractor: {}'.format(', '.join(self._sort_extractor)))
 								        write_debug('Formats sorted by: {}'.format(', '.join(['{}{}{}'.format(
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            '+' if self._get_field_setting(field, 'reverse') else '', field,
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            '{}{}({})'.format('~' if self._get_field_setting(field, 'closest') else ':',
 								                              self._get_field_setting(field, 'limit_text'),
 								                              self._get_field_setting(field, 'limit'))
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            if self._get_field_setting(field, 'limit_text') is not None else '')
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								            for field in self._order if self._get_field_setting(field, 'visible')])))
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    def _calculate_field_preference_from_value(self, format_, field, type_, value):
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        reverse = self._get_field_setting(field, 'reverse')
 								        closest = self._get_field_setting(field, 'closest')
 								        limit = self._get_field_setting(field, 'limit')
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        if type_ == 'extractor':
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            maximum = self._get_field_setting(field, 'max')
 								            if value is None or (maximum is not None and value >= maximum):
 								                value = -1
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        elif type_ == 'boolean':
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            in_list = self._get_field_setting(field, 'in_list')
 								            not_in_list = self._get_field_setting(field, 'not_in_list')
 								            value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        elif type_ == 'ordered':
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            value = self._resolve_field_value(field, value, True)
 								        # try to convert to number
 								        val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
 								        is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
 								        if is_num:
 								            value = val_num
 								        return ((-10, 0) if value is None
 								                else (1, value, 0) if not is_num  # if a field has mixed strings and numbers, strings are sorted higher
 								                else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
 								                else (0, value, 0) if not reverse and (limit is None or value <= limit)
 								                else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
 								                else (-1, value, 0))
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								    def _calculate_field_preference(self, format_, field):
 								        type_ = self._get_field_setting(field, 'type')  # extractor, boolean, ordered, field, multiple
 								        get_value = lambda f: format_.get(self._get_field_setting(f, 'field'))
 								        if type_ == 'multiple':
 								            type_ = 'field'  # Only 'field' is allowed in multiple for now
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								            actual_fields = self._get_field_setting(field, 'field')
 								            value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
 								        else:
 								            value = get_value(field)
-												[cleanup] Add more ruff rules (#10149)

Authored by: seproDev

Reviewed-by: bashonly <88596187+bashonly@users.noreply.github.com>
Reviewed-by: Simon Sawicki <contact@grub4k.xyz>
											
										
										
											2024-06-11 23:09:58 +00:00
+								        return self._calculate_field_preference_from_value(format_, field, type_, value)
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
 								    def calculate_preference(self, format):
 								        # Determine missing protocol
 								        if not format.get('protocol'):
 								            format['protocol'] = determine_protocol(format)
 								        # Determine missing ext
 								        if not format.get('ext') and 'url' in format:
 								            format['ext'] = determine_ext(format['url'])
 								        if format.get('vcodec') == 'none':
 								            format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
 								            format['video_ext'] = 'none'
 								        else:
 								            format['video_ext'] = format['ext']
 								            format['audio_ext'] = 'none'
 								        # if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'):  # Not supported?
 								        #    format['preference'] = -1000
-												Deprioritize HEVC-over-FLV formats (#5823)

Authored by: Lesmiscore
											
										
										
											2022-12-19 02:36:14 +00:00
+								        if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265|he?vc?', format.get('vcodec') or ''):
 								            # HEVC-over-FLV is out-of-spec by FLV's original spec
 								            # ref. https://trac.ffmpeg.org/ticket/6389
 								            # ref. https://github.com/yt-dlp/yt-dlp/pull/5821
 								            format['preference'] = -100
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
+								        # Determine missing bitrates
-												[utils] `FormatSorter`: Improve `size` and `br`

Closes #1596

Previously, when some formats have accurate size and some approximate,
the ones with accurate size was always prioritized

For formats with known tbr and unknown vbr/abr, we were setting
(vbr=tbr, abr=0) for sorting to work. This is no longer needed.

Authored by pukkandan, u-spec-png

											
										
										
											2023-06-19 08:36:39 +00:00
+								        if format.get('vcodec') == 'none':
 								            format['vbr'] = 0
 								        if format.get('acodec') == 'none':
 								            format['abr'] = 0
 								        if not format.get('vbr') and format.get('vcodec') != 'none':
 								            format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
 								        if not format.get('abr') and format.get('acodec') != 'none':
 								            format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
 								        if not format.get('tbr'):
 								            format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
-												[utils] Move format sorting code into `utils`

											
										
										
											2022-11-17 05:33:20 +00:00
 								        return tuple(self._calculate_field_preference(format, field) for field in self._order)
-												[utils] Add temporary shim for logging

Related: #5680, #7517

											
										
										
											2023-07-15 06:11:08 +00:00
-												[core] Fix `filesize_approx` calculation (#9560)

Reverts 22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80

Despite being documented as `Kbit/s`, the extractors/manifests were returning bitrates in SI units of kilobits/sec.

Authored by: seproDev, pukkandan
											
										
										
											2024-03-31 23:17:24 +00:00
+								def filesize_from_tbr(tbr, duration):
 								    """
 								    @param tbr:      Total bitrate in kbps (1000 bits/sec)
 								    @param duration: Duration in seconds
 								    @returns         Filesize in bytes
 								    """
 								    if tbr is None or duration is None:
 								        return None
 								    return int(duration * tbr * (1000 / 8))
-												[utils] Add temporary shim for logging

Related: #5680, #7517

											
										
										
											2023-07-15 06:11:08 +00:00
+								# XXX: Temporary
 								class _YDLLogger:
 								    def __init__(self, ydl=None):
 								        self._ydl = ydl
 								    def debug(self, message):
 								        if self._ydl:
 								            self._ydl.write_debug(message)
 								    def info(self, message):
 								        if self._ydl:
 								            self._ydl.to_screen(message)
 								    def warning(self, message, *, once=False):
 								        if self._ydl:
-												[compat, networking] Deprecate old functions (#2861)

Authored by: coletdjnz, pukkandan

											
										
										
											2023-07-09 07:53:02 +00:00
+								            self._ydl.report_warning(message, once)
-												[utils] Add temporary shim for logging

Related: #5680, #7517

											
										
										
											2023-07-15 06:11:08 +00:00
 								    def error(self, message, *, is_error=True):
 								        if self._ydl:
 								            self._ydl.report_error(message, is_error=is_error)
 								    def stdout(self, message):
 								        if self._ydl:
 								            self._ydl.to_stdout(message)
 								    def stderr(self, message):
 								        if self._ydl:
 								            self._ydl.to_stderr(message)