mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
Implement flush for search backend after remove command
This commit is contained in:
parent
c2c01af3ad
commit
47daa038eb
5 changed files with 21 additions and 7 deletions
|
@ -147,8 +147,8 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
|
||||
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
|
||||
# SONIC
|
||||
'SONIC_BUCKET': {'type': str, 'default': 'archivebox'},
|
||||
'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'},
|
||||
'SONIC_COLLECTION': {'type': str, 'default': 'archivebox'},
|
||||
'SONIC_BUCKET': {'type': str, 'default': 'snapshots'},
|
||||
},
|
||||
|
||||
'DEPENDENCY_CONFIG': {
|
||||
|
|
|
@ -18,7 +18,7 @@ class SearchResultsAdminMixin(object):
|
|||
except Exception as err:
|
||||
messages.add_message(request, messages.WARNING, f'Error from the search backend, only showing results from default admin search fields - Error: {err}')
|
||||
else:
|
||||
qsearch = queryset.filter(id__in=snapshot_ids)
|
||||
qsearch = queryset.filter(pk__in=snapshot_ids)
|
||||
qs |= qsearch
|
||||
|
||||
finally:
|
||||
|
|
|
@ -115,6 +115,7 @@ from .logging_util import (
|
|||
printable_dependency_version,
|
||||
)
|
||||
|
||||
from .search import flush_search_index
|
||||
|
||||
ALLOWED_IN_OUTPUT_DIR = {
|
||||
'lost+found',
|
||||
|
@ -665,6 +666,7 @@ def remove(filter_str: Optional[str]=None,
|
|||
to_remove = snapshots.count()
|
||||
|
||||
remove_from_sql_main_index(snapshots=snapshots, out_dir=out_dir)
|
||||
flush_search_index(snapshot_ids=[str(pk) for pk in snapshots.values_list('pk',flat=True)])
|
||||
all_snapshots = load_main_index(out_dir=out_dir)
|
||||
log_removal_finished(all_snapshots.count(), to_remove)
|
||||
|
||||
|
|
|
@ -45,4 +45,11 @@ def query_search_index(text: str) -> List:
|
|||
return backend.search(text)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
@enforce_types
|
||||
def flush_search_index(snapshot_ids: List[str]):
|
||||
if not indexing_enabled() or not snapshot_ids:
|
||||
return
|
||||
backend = import_backend()
|
||||
backend.flush(snapshot_ids)
|
||||
|
|
@ -10,11 +10,16 @@ from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEA
|
|||
def index(snapshot_id: str, texts: List[str]):
|
||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||
for text in texts:
|
||||
ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
|
||||
ingestcl.push(SONIC_COLLECTION, SONIC_BUCKET, snapshot_id, str(text))
|
||||
|
||||
@enforce_types
|
||||
def search(text: str) -> List:
|
||||
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
|
||||
snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
|
||||
snap_ids = querycl.query(SONIC_COLLECTION, SONIC_BUCKET, text)
|
||||
return snap_ids
|
||||
|
||||
|
||||
@enforce_types
|
||||
def flush(snapshot_ids: List[str]):
|
||||
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
|
||||
for id in snapshot_ids:
|
||||
ingestcl.flush_object(SONIC_COLLECTION, SONIC_BUCKET, str(id))
|
||||
|
|
Loading…
Reference in a new issue