mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-01-07 10:49:05 +00:00
Improve output template internal formatting
* Allow slicing lists/strings using `field.start🔚step`
* A field can also be used as offset like `field1+num+field2`
* A default value can be given using `field|default`
* Capture all format strings and set it to `None` if invalid. This prevents invalid fields from causing errors
This commit is contained in:
parent
12e73423f1
commit
e625be0d10
4 changed files with 71 additions and 32 deletions
|
@ -842,13 +842,14 @@ The simplest usage of `-o` is not to set any template arguments when downloading
|
||||||
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [python string formatting operations](https://docs.python.org/2/library/stdtypes.html#string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations.
|
||||||
|
|
||||||
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
The field names themselves (the part inside the parenthesis) can also have some special formatting:
|
||||||
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s` or `%(upload_date>%Y-%m-%d)s`
|
1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)`, `%(id.3:7:-1)s`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
|
||||||
2. **Offset numbers**: Numeric fields can have an initial offset specified by using a `+` separator. Eg: `%(playlist_index+10)03d`. This can also be used in conjunction with the date-time formatting. Eg: `%(epoch+-3600>%H-%M-%S)s`
|
1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
|
||||||
3. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. Eg: `%(tags.0)s` or `%(subtitles.en.-1.ext)`. Note that the fields that become available using this method are not listed below. Use `-j` to see such fields
|
1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s`
|
||||||
|
1. **Default**: A default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
|
||||||
|
|
||||||
To summarize, the general syntax for a field is:
|
To summarize, the general syntax for a field is:
|
||||||
```
|
```
|
||||||
%(name[.keys][+offset][>strf])[flags][width][.precision][length]type
|
%(name[.keys][addition][>strf][|default])[flags][width][.precision][length]type
|
||||||
```
|
```
|
||||||
|
|
||||||
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation`, `infojson`, `pl_description`, `pl_infojson`, `chapter`. For example, `-o '%(title)s.%(ext)s' -o 'thumbnail:%(title)s\%(title)s.%(ext)s'` will put the thumbnails in a folder with the same name as the video.
|
||||||
|
|
|
@ -843,29 +843,67 @@ class YoutubeDL(object):
|
||||||
if sanitize is None:
|
if sanitize is None:
|
||||||
sanitize = lambda k, v: v
|
sanitize = lambda k, v: v
|
||||||
|
|
||||||
# Internal Formatting = name.key1.key2+number>strf
|
EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
|
||||||
INTERNAL_FORMAT_RE = FORMAT_RE.format(
|
# Field is of the form key1.key2...
|
||||||
r'''(?P<final_key>
|
# where keys (except first) can be string, int or slice
|
||||||
(?P<fields>\w+(?:\.[-\w]+)*)
|
FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
|
||||||
(?:\+(?P<add>-?\d+(?:\.\d+)?))?
|
INTERNAL_FORMAT_RE = re.compile(r'''(?x)
|
||||||
(?:>(?P<strf_format>.+?))?
|
(?P<negate>-)?
|
||||||
)''')
|
(?P<fields>{0})
|
||||||
for mobj in re.finditer(INTERNAL_FORMAT_RE, outtmpl):
|
(?P<maths>(?:[-+]-?(?:\d+(?:\.\d+)?|{0}))*)
|
||||||
mobj = mobj.groupdict()
|
(?:>(?P<strf_format>.+?))?
|
||||||
# Object traversal
|
(?:\|(?P<default>.*?))?
|
||||||
fields = mobj['fields'].split('.')
|
$'''.format(FIELD_RE))
|
||||||
final_key = mobj['final_key']
|
MATH_OPERATORS_RE = re.compile(r'(?<![-+])([-+])')
|
||||||
value = traverse_dict(template_dict, fields)
|
MATH_FUNCTIONS = {
|
||||||
# Offset the value
|
'+': float.__add__,
|
||||||
if mobj['add']:
|
'-': float.__sub__,
|
||||||
value = float_or_none(value)
|
}
|
||||||
if value is not None:
|
for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
|
||||||
value = value + float(mobj['add'])
|
final_key = outer_mobj.group('key')
|
||||||
# Datetime formatting
|
str_type = outer_mobj.group('type')
|
||||||
if mobj['strf_format']:
|
value = None
|
||||||
value = strftime_or_none(value, mobj['strf_format'])
|
mobj = re.match(INTERNAL_FORMAT_RE, final_key)
|
||||||
if mobj['type'] in 'crs' and value is not None: # string
|
if mobj is not None:
|
||||||
value = sanitize('%{}'.format(mobj['type']) % fields[-1], value)
|
mobj = mobj.groupdict()
|
||||||
|
# Object traversal
|
||||||
|
fields = mobj['fields'].split('.')
|
||||||
|
value = traverse_dict(template_dict, fields)
|
||||||
|
# Negative
|
||||||
|
if mobj['negate']:
|
||||||
|
value = float_or_none(value)
|
||||||
|
if value is not None:
|
||||||
|
value *= -1
|
||||||
|
# Do maths
|
||||||
|
if mobj['maths']:
|
||||||
|
value = float_or_none(value)
|
||||||
|
operator = None
|
||||||
|
for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
|
||||||
|
if item == '':
|
||||||
|
value = None
|
||||||
|
if value is None:
|
||||||
|
break
|
||||||
|
if operator:
|
||||||
|
item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
|
||||||
|
offset = float_or_none(item)
|
||||||
|
if offset is None:
|
||||||
|
offset = float_or_none(traverse_dict(template_dict, item.split('.')))
|
||||||
|
try:
|
||||||
|
value = operator(value, multiplier * offset)
|
||||||
|
except (TypeError, ZeroDivisionError):
|
||||||
|
value = None
|
||||||
|
operator = None
|
||||||
|
else:
|
||||||
|
operator = MATH_FUNCTIONS[item]
|
||||||
|
# Datetime formatting
|
||||||
|
if mobj['strf_format']:
|
||||||
|
value = strftime_or_none(value, mobj['strf_format'])
|
||||||
|
# Set default
|
||||||
|
if value is None and mobj['default'] is not None:
|
||||||
|
value = mobj['default']
|
||||||
|
# Sanitize
|
||||||
|
if str_type in 'crs' and value is not None: # string
|
||||||
|
value = sanitize('%{}'.format(str_type) % fields[-1], value)
|
||||||
else: # numeric
|
else: # numeric
|
||||||
numeric_fields.append(final_key)
|
numeric_fields.append(final_key)
|
||||||
value = float_or_none(value)
|
value = float_or_none(value)
|
||||||
|
|
|
@ -24,7 +24,7 @@ class ExecAfterDownloadPP(PostProcessor):
|
||||||
|
|
||||||
def parse_cmd(self, cmd, info):
|
def parse_cmd(self, cmd, info):
|
||||||
# If no %(key)s is found, replace {} for backard compatibility
|
# If no %(key)s is found, replace {} for backard compatibility
|
||||||
if not re.search(FORMAT_RE.format(r'[-\w>.+]+'), cmd):
|
if not re.search(FORMAT_RE.format(r'[^)]*'), cmd):
|
||||||
if '{}' not in cmd:
|
if '{}' not in cmd:
|
||||||
cmd += ' {}'
|
cmd += ' {}'
|
||||||
return cmd.replace('{}', compat_shlex_quote(info['filepath']))
|
return cmd.replace('{}', compat_shlex_quote(info['filepath']))
|
||||||
|
|
|
@ -6112,11 +6112,11 @@ def traverse_dict(dictn, keys, casesense=True):
|
||||||
key = key.lower()
|
key = key.lower()
|
||||||
dictn = dictn.get(key)
|
dictn = dictn.get(key)
|
||||||
elif isinstance(dictn, (list, tuple, compat_str)):
|
elif isinstance(dictn, (list, tuple, compat_str)):
|
||||||
key, n = int_or_none(key), len(dictn)
|
if ':' in key:
|
||||||
if key is not None and -n <= key < n:
|
key = slice(*map(int_or_none, key.split(':')))
|
||||||
dictn = dictn[key]
|
|
||||||
else:
|
else:
|
||||||
dictn = None
|
key = int_or_none(key)
|
||||||
|
dictn = try_get(dictn, lambda x: x[key])
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
return dictn
|
return dictn
|
||||||
|
|
Loading…
Reference in a new issue