mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
feat: add search filter-type to list command
This commit is contained in:
parent
fb67d6684c
commit
0f7dba07df
2 changed files with 34 additions and 2 deletions
|
@ -98,7 +98,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
parser.add_argument(
|
||||
'--filter-type',
|
||||
type=str,
|
||||
choices=('exact', 'substring', 'domain', 'regex','tag'),
|
||||
choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
|
||||
default='exact',
|
||||
help='Type of pattern matching to use when filtering URLs',
|
||||
)
|
||||
|
|
|
@ -51,6 +51,8 @@ from .sql import (
|
|||
write_sql_link_details,
|
||||
)
|
||||
|
||||
from ..search import search_backend_enabled, query_search_index
|
||||
|
||||
### Link filtering and checking
|
||||
|
||||
@enforce_types
|
||||
|
@ -365,7 +367,7 @@ LINK_FILTERS = {
|
|||
}
|
||||
|
||||
@enforce_types
|
||||
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
def q_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
q_filter = Q()
|
||||
for pattern in filter_patterns:
|
||||
try:
|
||||
|
@ -380,6 +382,36 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
|||
raise SystemExit(2)
|
||||
return snapshots.filter(q_filter)
|
||||
|
||||
def search_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='search') -> QuerySet:
|
||||
if not search_backend_enabled():
|
||||
stderr()
|
||||
stderr(
|
||||
'[X] The search backend is not enabled',
|
||||
color='red',
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
qsearch = get_empty_snapshot_queryset()
|
||||
for pattern in filter_patterns:
|
||||
try:
|
||||
qsearch |= query_search_index(pattern)
|
||||
except Exception as err:
|
||||
stderr()
|
||||
stderr(
|
||||
f'[X] The search backend threw an exception={err}:',
|
||||
color='red',
|
||||
)
|
||||
raise SystemExit(2)
|
||||
|
||||
return snapshots & qsearch
|
||||
|
||||
@enforce_types
|
||||
def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type: str='exact') -> QuerySet:
|
||||
if filter_type != 'search':
|
||||
return q_filter(snapshots, filter_patterns, filter_type)
|
||||
else:
|
||||
return search_filter(snapshots, filter_patterns, filter_type)
|
||||
|
||||
|
||||
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||
"""indexed links without checking archive status or data directory validity"""
|
||||
|
|
Loading…
Reference in a new issue