Add all format filtering operators also to --match-filter

PR: https://github.com/ytdl-org/youtube-dl/pull/27361

Authored by: max-te
This commit is contained in:
Max Teegen 2021-06-13 16:25:19 +02:00 committed by pukkandan
parent 678da2f21b
commit 77b87f0519
4 changed files with 49 additions and 42 deletions

View file

@ -338,25 +338,21 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t
COUNT views
--max-views COUNT Do not download any videos with more than
COUNT views
--match-filter FILTER Generic video filter. Specify any key (see
"OUTPUT TEMPLATE" for a list of available
keys) to match if the key is present, !key
to check if the key is not present,
key>NUMBER (like "view_count > 12", also
works with >=, <, <=, !=, =) to compare
against a number, key = 'LITERAL' (like
"uploader = 'Mike Smith'", also works with
!=) to match against a string literal and &
to require multiple matches. Values which
are not known are excluded unless you put a
question mark (?) after the operator. For
example, to only match videos that have
been liked more than 100 times and disliked
less than 50 times (or the dislike
functionality is not available at the given
service), but who also have a description,
use --match-filter "like_count > 100 &
dislike_count <? 50 & description"
--match-filter FILTER Generic video filter. Any field (see
"OUTPUT TEMPLATE") can be compared with a
number or a quoted string using the
operators defined in "Filtering formats".
You can also simply specify a field to
match if the field is present and "!field"
to check if the field is not present.
Multiple filters can be checked using "&".
For example, to only match videos that are
not live, has a like count more than 100, a
dislike count less than 50 (or the dislike
field is not available), and also has a
description that contains "python", use
--match-filter "!is_live & like_count>100 &
dislike_count<?50 & description*='python'"
--no-match-filter Do not use generic video filter (default)
--no-playlist Download only the video, if the URL refers
to a video and a playlist

View file

@ -1207,7 +1207,6 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
'9999 51')
def test_match_str(self):
self.assertRaises(ValueError, match_str, 'xy>foobar', {})
self.assertFalse(match_str('xy', {'x': 1200}))
self.assertTrue(match_str('!xy', {'x': 1200}))
self.assertTrue(match_str('x', {'x': 1200}))
@ -1224,6 +1223,17 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4')
self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'}))
self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'}))
self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'}))
self.assertTrue(match_str('y^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'}))
self.assertFalse(match_str('y^=bar', {'y': 'foobar42'}))
self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'}))
self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42})
self.assertTrue(match_str('y*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'}))
self.assertFalse(match_str('y*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'}))
self.assertTrue(match_str('y$=42', {'y': 'foobar42'}))
self.assertFalse(match_str('y$=43', {'y': 'foobar42'}))
self.assertFalse(match_str(
'like_count > 100 & dislike_count <? 50 & description',
{'like_count': 90, 'description': 'foo'}))

View file

@ -375,22 +375,16 @@ def parseOpts(overrideArguments=None):
'--match-filter',
metavar='FILTER', dest='match_filter', default=None,
help=(
'Generic video filter. '
'Specify any key (see "OUTPUT TEMPLATE" for a list of available keys) to '
'match if the key is present, '
'!key to check if the key is not present, '
'key>NUMBER (like "view_count > 12", also works with '
'>=, <, <=, !=, =) to compare against a number, '
'key = \'LITERAL\' (like "uploader = \'Mike Smith\'", also works with !=) '
'to match against a string literal '
'and & to require multiple matches. '
'Values which are not known are excluded unless you '
'put a question mark (?) after the operator. '
'For example, to only match videos that have been liked more than '
'100 times and disliked less than 50 times (or the dislike '
'functionality is not available at the given service), but who '
'also have a description, use --match-filter '
'"like_count > 100 & dislike_count <? 50 & description"'))
'Generic video filter. Any field (see "OUTPUT TEMPLATE") can be compared with a '
'number or a string using the operators defined in "Filtering formats". '
'You can also simply specify a field to match if the field is present '
'and "!field" to check if the field is not present. '
'Multiple filters can be checked using "&". '
'For example, to only match videos that are not live, '
'has a like count more than 100, a dislike count less than 50 '
'(or the dislike field is not available), and also has a description '
'that contains "python", use --match-filter "!is_live & '
'like_count>100 & dislike_count<?50 & description*=\'python\'"'))
selection.add_option(
'--no-match-filter',
metavar='FILTER', dest='match_filter', action='store_const', const=None,

View file

@ -4663,17 +4663,20 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False):
def _match_one(filter_part, dct):
# TODO: Generalize code with YoutubeDL._build_format_filter
COMPARISON_OPERATORS = {
'<': operator.lt,
'<=': operator.le,
'>': operator.gt,
'>=': operator.ge,
'=': operator.eq,
'!=': operator.ne,
'*=': operator.contains,
'^=': lambda attr, value: attr.startswith(value),
'$=': lambda attr, value: attr.endswith(value),
}
operator_rex = re.compile(r'''(?x)\s*
(?P<key>[a-z_]+)
\s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
(?:
(?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
(?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
@ -4683,7 +4686,11 @@ def _match_one(filter_part, dct):
''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
m = operator_rex.search(filter_part)
if m:
op = COMPARISON_OPERATORS[m.group('op')]
unnegated_op = COMPARISON_OPERATORS[m.group('op')]
if m.group('negation'):
op = lambda attr, value: not unnegated_op(attr, value)
else:
op = unnegated_op
actual_value = dct.get(m.group('key'))
if (m.group('quotedstrval') is not None
or m.group('strval') is not None
@ -4693,14 +4700,14 @@ def _match_one(filter_part, dct):
# https://github.com/ytdl-org/youtube-dl/issues/11082).
or actual_value is not None and m.group('intval') is not None
and isinstance(actual_value, compat_str)):
if m.group('op') not in ('=', '!='):
raise ValueError(
'Operator %s does not support string values!' % m.group('op'))
comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
quote = m.group('quote')
if quote is not None:
comparison_value = comparison_value.replace(r'\%s' % quote, quote)
else:
if m.group('op') in ('*=', '^=', '$='):
raise ValueError(
'Operator %s only supports string values!' % m.group('op'))
try:
comparison_value = int(m.group('intval'))
except ValueError: