Implement flush for search backend after remove command

This commit is contained in:
JDC 2020-11-19 16:45:12 -05:00 committed by Nick Sweeting
parent c2c01af3ad
commit 47daa038eb
5 changed files with 21 additions and 7 deletions

View file

@ -147,8 +147,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
# SONIC
'SONIC_BUCKET': {'type': str, 'default': 'archivebox'},
'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'},
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
},
'DEPENDENCY_CONFIG': {

View file

@ -18,7 +18,7 @@ class SearchResultsAdminMixin(object):
except Exception as err:
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
else:
qsearch = queryset.filter(id__in=snapshot_ids)
qsearch = queryset.filter(pk__in=snapshot_ids)
qs |= qsearch
finally:

View file

@ -115,6 +115,7 @@ from .logging_util import (
printable_dependency_version,
)
from .search import flush_search_index
ALLOWED_IN_OUTPUT_DIR = {
'lost+found',
@ -665,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
to_remove = snapshots.count()
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)])
all_snapshots = load_main_index(out_dir=out_dir)
log_removal_finished(all_snapshots.count(), to_remove)

View file

@ -45,4 +45,11 @@ def query_search_index(text: str) -> List:
return backend.search(text)
else:
return []
@enforce_types
def flush_search_index(snapshot_ids: List[str]):
if not indexing_enabled() or not snapshot_ids:
return
backend = import_backend()
backend.flush(snapshot_ids)

View file

@ -10,11 +10,16 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
def index(snapshot_id: str, texts: List[str]):
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for text in texts:
ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
@enforce_types
def search(text: str) -> List:
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
return snap_ids
@enforce_types
def flush(snapshot_ids: List[str]):
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for id in snapshot_ids:
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))