From 9aac09a5e113371c70eaa6158df8a5c4acecd2c3 Mon Sep 17 00:00:00 2001 From: Cristian Date: Tue, 8 Dec 2020 18:05:37 -0500 Subject: [PATCH] feat: Patch setup_django so we can use an inmemory db in specific commands --- archivebox/cli/__init__.py | 4 +++- archivebox/config.py | 12 ++++++++++-- archivebox/core/settings.py | 4 +++- archivebox/index/schema.py | 7 +------ archivebox/main.py | 2 +- archivebox/search/__init__.py | 2 -- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/archivebox/cli/__init__.py b/archivebox/cli/__init__.py index dfc607e1..9cf6d0ac 100644 --- a/archivebox/cli/__init__.py +++ b/archivebox/cli/__init__.py @@ -19,6 +19,8 @@ meta_cmds = ('help', 'version') main_cmds = ('init', 'info', 'config') archive_cmds = ('add', 'remove', 'update', 'list', 'status') +fake_db = ("oneshot",) + meta_cmds + display_first = (*meta_cmds, *main_cmds, *archive_cmds) # every imported command module must have these properties in order to be valid @@ -59,7 +61,7 @@ def run_subcommand(subcommand: str, pwd: Union[Path, str, None]=None) -> None: """Run a given ArchiveBox subcommand with the given list of args""" from ..config import setup_django - setup_django() + setup_django(in_memory_db=subcommand in fake_db) module = import_module('.archivebox_{}'.format(subcommand), __package__) module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore diff --git a/archivebox/config.py b/archivebox/config.py index 846df0c9..0e07a34e 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -991,7 +991,7 @@ def check_data_folder(out_dir: Union[str, Path, None]=None, config: ConfigDict=C -def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG) -> None: +def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG, in_memory_db=False) -> None: check_system_config() output_dir = out_dir or Path(config['OUTPUT_DIR']) @@ -1004,7 +1004,15 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG) os.environ.setdefault('OUTPUT_DIR', str(output_dir)) assert (config['PACKAGE_DIR'] / 'core' / 'settings.py').exists(), 'settings.py was not found at archivebox/core/settings.py' os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') - django.setup() + + if in_memory_db: + # Put the db in memory and run migrations in case any command requires it + from django.core.management import call_command + os.environ.setdefault("ARCHIVEBOX_DATABASE_NAME", ":memory:") + django.setup() + call_command("migrate", interactive=False, stdout=False) + else: + django.setup() if check_db: sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 28a3e1fe..43a1e153 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -100,10 +100,12 @@ TEMPLATES = [ ################################################################################ DATABASE_FILE = Path(OUTPUT_DIR) / SQL_INDEX_FILENAME +DATABASE_NAME = os.environ.get("ARCHIVEBOX_DATABASE_NAME", DATABASE_FILE) + DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', - 'NAME': str(DATABASE_FILE), + 'NAME': DATABASE_NAME, } } diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index 6180d3b0..c6bf3731 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -353,12 +353,7 @@ class Link: ### Archive Status Helpers @property def num_outputs(self) -> int: - try: - return self.as_snapshot().num_outputs - except OperationalError: - return sum(1 for method in self.history.keys() - for result in self.history[method] - if result.status == 'succeeded') + return self.as_snapshot().num_outputs @property def num_failures(self) -> int: diff --git a/archivebox/main.py b/archivebox/main.py index bb24d124..49c31eed 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -524,7 +524,7 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR): ) raise SystemExit(2) methods = ignore_methods(['title']) - archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=True) + archive_link(oneshot_link[0], out_dir=out_dir, methods=methods, skip_index=False) return oneshot_link @enforce_types diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 9efe838b..a1f67ef7 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -33,7 +33,6 @@ def write_search_index(link: Link, texts: Union[List[str], None]=None, out_dir: return if not skip_text_index and texts: - setup_django(out_dir, check_db=True) from core.models import Snapshot snap = Snapshot.objects.filter(url=link.url).first() @@ -91,7 +90,6 @@ def index_links(links: Union[List[Link],None], out_dir: Path=OUTPUT_DIR): if not links: return - setup_django(out_dir=out_dir, check_db=True) from core.models import Snapshot, ArchiveResult for link in links: