Use QuerySets for search backend API instead of pks

This commit is contained in:
JDC 2020-11-19 18:19:33 -05:00 committed by Nick Sweeting
parent f383648ffc
commit 823df34080
3 changed files with 16 additions and 9 deletions

View file

@ -14,12 +14,10 @@ class SearchResultsAdminMixin(object):
if not search_term: if not search_term:
return qs, use_distinct return qs, use_distinct
try: try:
snapshot_ids = query_search_index(search_term) qsearch = query_search_index(search_term)
except Exception as err: except Exception as err:
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}') messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
else: else:
qsearch = queryset.filter(pk__in=snapshot_ids)
qs |= qsearch qs |= qsearch
finally: finally:
return qs, use_distinct return qs, use_distinct

View file

@ -666,7 +666,7 @@ def remove(filter_str: Optional[str]=None,
to_remove = snapshots.count() to_remove = snapshots.count()
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir) remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
flush_search_index(snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))) flush_search_index(snapshots=snapshots)
all_snapshots = load_main_index(out_dir=out_dir) all_snapshots = load_main_index(out_dir=out_dir)
log_removal_finished(all_snapshots.count(), to_remove) log_removal_finished(all_snapshots.count(), to_remove)

View file

@ -1,7 +1,8 @@
from typing import List, Union, Generator from typing import List, Union
from pathlib import Path from pathlib import Path
from importlib import import_module from importlib import import_module
from django.db.models import QuerySet
from archivebox.index.schema import Link from archivebox.index.schema import Link
from archivebox.util import enforce_types from archivebox.util import enforce_types
@ -39,16 +40,24 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir:
backend.index(snapshot_id=str(snap.id), texts=texts) backend.index(snapshot_id=str(snap.id), texts=texts)
@enforce_types @enforce_types
def query_search_index(text: str) -> List[str]: def query_search_index(query: str, out_dir: Path=OUTPUT_DIR) -> QuerySet:
if search_backend_enabled(): if search_backend_enabled():
setup_django(out_dir, check_db=True)
from core.models import Snapshot
backend = import_backend() backend = import_backend()
return backend.search(text) snapshot_ids = backend.search(query)
# TODO preserve ordering from backend
qsearch = Snapshot.objects.filter(pk__in=snapshot_ids)
return qsearch
else: else:
return [] return []
@enforce_types @enforce_types
def flush_search_index(snapshot_ids: Generator[str, None, None]): def flush_search_index(snapshots: QuerySet):
if not indexing_enabled() or not snapshot_ids: if not indexing_enabled() or not snapshots:
return return
backend = import_backend() backend = import_backend()
snapshot_ids=(str(pk) for pk in snapshots.values_list('pk',flat=True))
backend.flush(snapshot_ids) backend.flush(snapshot_ids)