Add config for search backend

This commit is contained in:
JDC 2020-11-19 08:06:13 -05:00 committed by Nick Sweeting
parent 5f6673c72c
commit c2c01af3ad
3 changed files with 23 additions and 13 deletions

View file

@ -139,6 +139,18 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'GIT_ARGS': {'type': list, 'default': ['--recursive']},
},
'SEARCH_BACKEND_CONFIG' : {
'USE_INDEXING_BACKEND': {'type': bool, 'default': True},
'USE_SEARCHING_BACKEND': {'type': bool, 'default': True},
'SEARCH_BACKEND_ENGINE': {'type': str, 'default': 'sonic'},
'SEARCH_BACKEND_HOST_NAME': {'type': str, 'default': 'localhost'},
'SEARCH_BACKEND_PORT': {'type': int, 'default': 1491},
'SEARCH_BACKEND_PASSWORD': {'type': str, 'default': 'SecretPassword'},
# SONIC
'SONIC_BUCKET': {'type': str, 'default': 'archivebox'},
'SONIC_COLLECTION': {'type': str, 'default': 'snapshots'},
},
'DEPENDENCY_CONFIG': {
'USE_CURL': {'type': bool, 'default': True},
'USE_WGET': {'type': bool, 'default': True},
@ -149,7 +161,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
'USE_CHROME': {'type': bool, 'default': True},
'USE_NODE': {'type': bool, 'default': True},
'USE_YOUTUBEDL': {'type': bool, 'default': True},
'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'},
'WGET_BINARY': {'type': str, 'default': 'wget'},

View file

@ -5,19 +5,16 @@ from importlib import import_module
from archivebox.index.schema import Link
from archivebox.util import enforce_types
from archivebox.config import setup_django, OUTPUT_DIR
from archivebox.config import setup_django, OUTPUT_DIR, USE_INDEXING_BACKEND, USE_SEARCHING_BACKEND, SEARCH_BACKEND_ENGINE
def indexing_enabled():
return True
# return FULLTEXT_INDEXING_ENABLED
return USE_INDEXING_BACKEND
def search_backend_enabled():
return True
# return FULLTEXT_SEARCH_ENABLED
return USE_SEARCHING_BACKEND
def get_backend():
return 'search.backends.sonic'
return f'search.backends.{SEARCH_BACKEND_ENGINE}'
def import_backend():
backend_string = get_backend()

View file

@ -3,17 +3,18 @@ from typing import List
from sonic import IngestClient, SearchClient
from archivebox.util import enforce_types
from archivebox.config import SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD, SONIC_BUCKET, SONIC_COLLECTION
@enforce_types
def index(snapshot_id: str, texts: List[str]):
# TODO add variables to localhost, port, password, bucket, collection
with IngestClient("localhost", 1491, "SecretPassword") as ingestcl:
with IngestClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as ingestcl:
for text in texts:
ingestcl.push("archivebox", "snapshots", snapshot_id, str(text))
ingestcl.push(SONIC_BUCKET, SONIC_COLLECTION, snapshot_id, str(text))
@enforce_types
def search(text: str) -> List:
with SearchClient("localhost", 1491, "SecretPassword") as querycl:
snap_ids = querycl.query("archivebox", "snapshots", text)
with SearchClient(SEARCH_BACKEND_HOST_NAME, SEARCH_BACKEND_PORT, SEARCH_BACKEND_PASSWORD) as querycl:
snap_ids = querycl.query(SONIC_BUCKET, SONIC_COLLECTION, text)
return snap_ids