mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2024-12-13 14:52:35 +00:00
[outtmpl] Add alternate forms F
, D
and improve `id` detection F = sanitize as filename (# = restricted) D = add Decimal suffixes Closes #2085, 2081
This commit is contained in:
parent
4ac5b94807
commit
e0fd95737d
4 changed files with 47 additions and 29 deletions
|
@ -1085,7 +1085,7 @@ The field names themselves (the part inside the parenthesis) can also have some
|
||||||
|
|
||||||
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||||
|
|
||||||
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively
|
1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q`, `D`, 'F' can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated), a string **q**uoted for the terminal (flag `#` to split a list into different arguments), to add **D**ecimal suffixes (Eg: 10M), and to sanitize as **F**ilename (flag `#` for restricted), respectively
|
||||||
|
|
||||||
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
|
1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
|
||||||
|
|
||||||
|
|
|
@ -717,6 +717,7 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(id)s', '.abcd', info={'id': '.abcd'})
|
test('%(id)s', '.abcd', info={'id': '.abcd'})
|
||||||
test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
|
test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
|
||||||
test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
|
test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
|
||||||
|
test('%(id.0)s', '-', info={'id': '--'})
|
||||||
|
|
||||||
# Invalid templates
|
# Invalid templates
|
||||||
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
|
self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
|
||||||
|
@ -777,6 +778,10 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
|
test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
|
||||||
test('%(title5)+U', 'áéí A')
|
test('%(title5)+U', 'áéí A')
|
||||||
test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
|
test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
|
||||||
|
test('%(height)D', '1K')
|
||||||
|
test('%(height)5.2D', ' 1.08K')
|
||||||
|
test('%(title4).10F', ('foo \'bar\' ', 'foo \'bar\'#'))
|
||||||
|
test('%(title4)#F', 'foo_bar_test')
|
||||||
if compat_os_name == 'nt':
|
if compat_os_name == 'nt':
|
||||||
test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
|
test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
|
||||||
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'"))
|
test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'"))
|
||||||
|
@ -808,6 +813,11 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(width-100,height+width|def)s', 'def')
|
test('%(width-100,height+width|def)s', 'def')
|
||||||
test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
|
test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
|
||||||
|
|
||||||
|
# Replacement
|
||||||
|
test('%(id&foo)s.bar', 'foo.bar')
|
||||||
|
test('%(title&foo)s.bar', 'NA.bar')
|
||||||
|
test('%(title&foo|baz)s.bar', 'baz.bar')
|
||||||
|
|
||||||
# Laziness
|
# Laziness
|
||||||
def gen():
|
def gen():
|
||||||
yield from range(5)
|
yield from range(5)
|
||||||
|
@ -836,11 +846,6 @@ class TestYoutubeDL(unittest.TestCase):
|
||||||
test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
|
test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
|
||||||
test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
|
test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
|
||||||
|
|
||||||
# Replacement
|
|
||||||
test('%(id&foo)s.bar', 'foo.bar')
|
|
||||||
test('%(title&foo)s.bar', 'NA.bar')
|
|
||||||
test('%(title&foo|baz)s.bar', 'baz.bar')
|
|
||||||
|
|
||||||
def test_format_note(self):
|
def test_format_note(self):
|
||||||
ydl = YoutubeDL()
|
ydl = YoutubeDL()
|
||||||
self.assertEqual(ydl._format_note({}), '')
|
self.assertEqual(ydl._format_note({}), '')
|
||||||
|
|
|
@ -67,6 +67,7 @@ from .utils import (
|
||||||
float_or_none,
|
float_or_none,
|
||||||
format_bytes,
|
format_bytes,
|
||||||
format_field,
|
format_field,
|
||||||
|
format_decimal_suffix,
|
||||||
formatSeconds,
|
formatSeconds,
|
||||||
GeoRestrictedError,
|
GeoRestrictedError,
|
||||||
get_domain,
|
get_domain,
|
||||||
|
@ -1005,7 +1006,7 @@ class YoutubeDL(object):
|
||||||
def validate_outtmpl(cls, outtmpl):
|
def validate_outtmpl(cls, outtmpl):
|
||||||
''' @return None or Exception object '''
|
''' @return None or Exception object '''
|
||||||
outtmpl = re.sub(
|
outtmpl = re.sub(
|
||||||
STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
|
STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDF]'),
|
||||||
lambda mobj: f'{mobj.group(0)[:-1]}s',
|
lambda mobj: f'{mobj.group(0)[:-1]}s',
|
||||||
cls._outtmpl_expandpath(outtmpl))
|
cls._outtmpl_expandpath(outtmpl))
|
||||||
try:
|
try:
|
||||||
|
@ -1021,8 +1022,12 @@ class YoutubeDL(object):
|
||||||
info_dict.pop(key, None)
|
info_dict.pop(key, None)
|
||||||
return info_dict
|
return info_dict
|
||||||
|
|
||||||
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
|
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
|
||||||
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
|
""" Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
|
||||||
|
@param sanitize Whether to sanitize the output as a filename.
|
||||||
|
For backward compatibility, a function can also be passed
|
||||||
|
"""
|
||||||
|
|
||||||
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set
|
||||||
|
|
||||||
info_dict = self._copy_infodict(info_dict)
|
info_dict = self._copy_infodict(info_dict)
|
||||||
|
@ -1043,7 +1048,7 @@ class YoutubeDL(object):
|
||||||
}
|
}
|
||||||
|
|
||||||
TMPL_DICT = {}
|
TMPL_DICT = {}
|
||||||
EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
|
EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDF]'))
|
||||||
MATH_FUNCTIONS = {
|
MATH_FUNCTIONS = {
|
||||||
'+': float.__add__,
|
'+': float.__add__,
|
||||||
'-': float.__sub__,
|
'-': float.__sub__,
|
||||||
|
@ -1051,7 +1056,7 @@ class YoutubeDL(object):
|
||||||
# Field is of the form key1.key2...
|
# Field is of the form key1.key2...
|
||||||
# where keys (except first) can be string, int or slice
|
# where keys (except first) can be string, int or slice
|
||||||
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
|
FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
|
||||||
MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
|
MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
|
||||||
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
|
||||||
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
|
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
|
||||||
(?P<negate>-)?
|
(?P<negate>-)?
|
||||||
|
@ -1107,6 +1112,13 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
na = self.params.get('outtmpl_na_placeholder', 'NA')
|
||||||
|
|
||||||
|
def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
|
||||||
|
return sanitize_filename(str(value), restricted=restricted,
|
||||||
|
is_id=re.search(r'(^|[_.])id(\.|$)', key))
|
||||||
|
|
||||||
|
sanitizer = sanitize if callable(sanitize) else filename_sanitizer
|
||||||
|
sanitize = bool(sanitize)
|
||||||
|
|
||||||
def _dumpjson_default(obj):
|
def _dumpjson_default(obj):
|
||||||
if isinstance(obj, (set, LazyList)):
|
if isinstance(obj, (set, LazyList)):
|
||||||
return list(obj)
|
return list(obj)
|
||||||
|
@ -1117,7 +1129,7 @@ class YoutubeDL(object):
|
||||||
return outer_mobj.group(0)
|
return outer_mobj.group(0)
|
||||||
key = outer_mobj.group('key')
|
key = outer_mobj.group('key')
|
||||||
mobj = re.match(INTERNAL_FORMAT_RE, key)
|
mobj = re.match(INTERNAL_FORMAT_RE, key)
|
||||||
initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
|
initial_field = mobj.group('fields') if mobj else ''
|
||||||
value, replacement, default = None, None, na
|
value, replacement, default = None, None, na
|
||||||
while mobj:
|
while mobj:
|
||||||
mobj = mobj.groupdict()
|
mobj = mobj.groupdict()
|
||||||
|
@ -1153,6 +1165,10 @@ class YoutubeDL(object):
|
||||||
# "+" = compatibility equivalence, "#" = NFD
|
# "+" = compatibility equivalence, "#" = NFD
|
||||||
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
|
'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
|
||||||
value), str_fmt
|
value), str_fmt
|
||||||
|
elif fmt[-1] == 'D': # decimal suffix
|
||||||
|
value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
|
||||||
|
elif fmt[-1] == 'F': # filename sanitization
|
||||||
|
value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
|
||||||
elif fmt[-1] == 'c':
|
elif fmt[-1] == 'c':
|
||||||
if value:
|
if value:
|
||||||
value = str(value)[0]
|
value = str(value)[0]
|
||||||
|
@ -1169,7 +1185,7 @@ class YoutubeDL(object):
|
||||||
# So we convert it to repr first
|
# So we convert it to repr first
|
||||||
value, fmt = repr(value), str_fmt
|
value, fmt = repr(value), str_fmt
|
||||||
if fmt[-1] in 'csr':
|
if fmt[-1] in 'csr':
|
||||||
value = sanitize(initial_field, value)
|
value = sanitizer(initial_field, value)
|
||||||
|
|
||||||
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
|
||||||
TMPL_DICT[key] = value
|
TMPL_DICT[key] = value
|
||||||
|
@ -1183,12 +1199,8 @@ class YoutubeDL(object):
|
||||||
|
|
||||||
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
def _prepare_filename(self, info_dict, tmpl_type='default'):
|
||||||
try:
|
try:
|
||||||
sanitize = lambda k, v: sanitize_filename(
|
|
||||||
compat_str(v),
|
|
||||||
restricted=self.params.get('restrictfilenames'),
|
|
||||||
is_id=(k == 'id' or k.endswith('_id')))
|
|
||||||
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
|
||||||
filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
|
filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
|
||||||
|
|
||||||
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
force_ext = OUTTMPL_TYPES.get(tmpl_type)
|
||||||
if filename and force_ext is not None:
|
if filename and force_ext is not None:
|
||||||
|
|
|
@ -2110,18 +2110,19 @@ def unsmuggle_url(smug_url, default=None):
|
||||||
return url, data
|
return url, data
|
||||||
|
|
||||||
|
|
||||||
|
def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
|
||||||
|
""" Formats numbers with decimal sufixes like K, M, etc """
|
||||||
|
num, factor = float_or_none(num), float(factor)
|
||||||
|
if num is None:
|
||||||
|
return None
|
||||||
|
exponent = 0 if num == 0 else int(math.log(num, factor))
|
||||||
|
suffix = ['', *'KMGTPEZY'][exponent]
|
||||||
|
converted = num / (factor ** exponent)
|
||||||
|
return fmt % (converted, suffix)
|
||||||
|
|
||||||
|
|
||||||
def format_bytes(bytes):
|
def format_bytes(bytes):
|
||||||
if bytes is None:
|
return format_decimal_suffix(bytes, '%.2f%siB', factor=1024) or 'N/A'
|
||||||
return 'N/A'
|
|
||||||
if type(bytes) is str:
|
|
||||||
bytes = float(bytes)
|
|
||||||
if bytes == 0.0:
|
|
||||||
exponent = 0
|
|
||||||
else:
|
|
||||||
exponent = int(math.log(bytes, 1024.0))
|
|
||||||
suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
|
|
||||||
converted = float(bytes) / float(1024 ** exponent)
|
|
||||||
return '%.2f%s' % (converted, suffix)
|
|
||||||
|
|
||||||
|
|
||||||
def lookup_unit_table(unit_table, s):
|
def lookup_unit_table(unit_table, s):
|
||||||
|
|
Loading…
Reference in a new issue