From 60f0458c774574dc89c78b1ce4ebdf32fd39f13f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 24 Oct 2024 15:40:24 -0700 Subject: [PATCH 01/25] rename configfile to collection --- archivebox/abx/archivebox/base_binary.py | 2 +- archivebox/abx/archivebox/base_configset.py | 22 +++++++++--------- archivebox/abx/archivebox/base_extractor.py | 25 +++++---------------- archivebox/abx/archivebox/reads.py | 2 +- archivebox/abx/archivebox/writes.py | 2 +- archivebox/config/configfile.py | 2 +- archivebox/config/constants.py | 19 ++++++++++++++-- archivebox/main.py | 2 +- archivebox/misc/util.py | 2 ++ 9 files changed, 41 insertions(+), 37 deletions(-) diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py index afa4f192..ee7ab5e1 100644 --- a/archivebox/abx/archivebox/base_binary.py +++ b/archivebox/abx/archivebox/base_binary.py @@ -14,7 +14,6 @@ from pydantic_pkgr import ( EnvProvider, ) -from archivebox.config import CONSTANTS from archivebox.config.permissions import ARCHIVEBOX_USER import abx @@ -34,6 +33,7 @@ class BaseBinProvider(BinProvider): return [self] class BaseBinary(Binary): + # TODO: formalize state diagram, final states, transitions, side effects, etc. @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/abx/archivebox/base_configset.py index 3a6695a1..706b9df8 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/abx/archivebox/base_configset.py @@ -99,7 +99,7 @@ class BaseConfigSet(BaseSettings): ) load_from_defaults: ClassVar[bool] = True - load_from_configfile: ClassVar[bool] = True + load_from_collection: ClassVar[bool] = True load_from_environment: ClassVar[bool] = True @classmethod @@ -128,7 +128,8 @@ class BaseConfigSet(BaseSettings): try: precedence_order = precedence_order or { 'defaults': init_settings, - 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), + # 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), + 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), 'environment': env_settings, } except Exception as err: @@ -144,14 +145,15 @@ class BaseConfigSet(BaseSettings): precedence_order = { 'defaults': init_settings, - 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), + # 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), + 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), 'environment': env_settings, } if not cls.load_from_environment: precedence_order.pop('environment') - if not cls.load_from_configfile: - precedence_order.pop('configfile') + if not cls.load_from_collection: + precedence_order.pop('collection') if not cls.load_from_defaults: precedence_order.pop('defaults') @@ -278,15 +280,15 @@ class BaseConfigSet(BaseSettings): """Get the dictionary of {key: value} config loaded from the default values""" class OnlyDefaultsConfig(self.__class__): load_from_defaults = True - load_from_configfile = False + load_from_collection = False load_from_environment = False return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_configfile(self) -> Dict[str, Any]: - """Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf""" + def from_collection(self) -> Dict[str, Any]: + """Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf""" class OnlyConfigFileConfig(self.__class__): load_from_defaults = False - load_from_configfile = True + load_from_collection = True load_from_environment = False return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) @@ -294,7 +296,7 @@ class BaseConfigSet(BaseSettings): """Get the dictionary of {key: value} config loaded from the environment variables""" class OnlyEnvironmentConfig(self.__class__): load_from_defaults = False - load_from_configfile = False + load_from_collection = False load_from_environment = True return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) diff --git a/archivebox/abx/archivebox/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py index f78921e0..51dcc8d2 100644 --- a/archivebox/abx/archivebox/base_extractor.py +++ b/archivebox/abx/archivebox/base_extractor.py @@ -4,10 +4,9 @@ import json import os from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple -from typing_extensions import Self from pathlib import Path -from pydantic import model_validator, AfterValidator +from pydantic import AfterValidator from pydantic_pkgr import BinName from django.utils.functional import cached_property from django.utils import timezone @@ -17,36 +16,22 @@ import abx from .base_binary import BaseBinary -def no_empty_args(args: List[str]) -> List[str]: +def assert_no_empty_args(args: List[str]) -> List[str]: assert all(len(arg) for arg in args) return args -ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str +ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())] HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))] -CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)] +CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)] class BaseExtractor: - name: ExtractorName binary: BinName - output_path_func: HandlerFuncStr = 'self.get_output_path' - should_extract_func: HandlerFuncStr = 'self.should_extract' - extract_func: HandlerFuncStr = 'self.extract' - exec_func: HandlerFuncStr = 'self.exec' - default_args: CmdArgsList = [] extra_args: CmdArgsList = [] - args: Optional[CmdArgsList] = None - - @model_validator(mode='after') - def validate_model(self) -> Self: - if self.args is None: - self.args = [*self.default_args, *self.extra_args] - return self - def get_output_path(self, snapshot) -> Path: return Path(self.__class__.__name__.lower()) @@ -71,7 +56,7 @@ class BaseExtractor: snapshot = Snapshot.objects.get(id=snapshot_id) - if not self.should_extract(snapshot): + if not self.should_extract(snapshot.url): return {} status = 'failed' diff --git a/archivebox/abx/archivebox/reads.py b/archivebox/abx/archivebox/reads.py index d2877ac5..10ad6ecd 100644 --- a/archivebox/abx/archivebox/reads.py +++ b/archivebox/abx/archivebox/reads.py @@ -57,7 +57,7 @@ def get_HOOKS() -> Set[str]: for hook_name in get_PLUGIN(plugin_id).hooks } -def get_CONFIGS() -> Dict[str, 'BaseConfigSet']: +def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet'] return benedict({ config_id: configset for plugin_configs in pm.hook.get_CONFIG() diff --git a/archivebox/abx/archivebox/writes.py b/archivebox/abx/archivebox/writes.py index 0c4566b4..1ca1ac7e 100644 --- a/archivebox/abx/archivebox/writes.py +++ b/archivebox/abx/archivebox/writes.py @@ -88,7 +88,7 @@ def create_root_snapshot_from_seed(crawl): def create_archiveresults_pending_from_snapshot(snapshot, config): config = get_scope_config( # defaults=settings.CONFIG_FROM_DEFAULTS, - # configfile=settings.CONFIG_FROM_FILE, + # collection=settings.CONFIG_FROM_FILE, # environment=settings.CONFIG_FROM_ENVIRONMENT, persona=archiveresult.snapshot.crawl.persona, seed=archiveresult.snapshot.crawl.seed, diff --git a/archivebox/config/configfile.py b/archivebox/config/configfile.py index c489e114..911e1559 100644 --- a/archivebox/config/configfile.py +++ b/archivebox/config/configfile.py @@ -15,7 +15,7 @@ from archivebox.misc.logging import stderr def get_real_name(key: str) -> str: - """get the current canonical name for a given deprecated config key""" + """get the up-to-date canonical name for a given old alias or current key""" from django.conf import settings for section in settings.CONFIGS.values(): diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index b8019f99..5124384d 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -1,3 +1,15 @@ +""" +Constants are for things that never change at runtime. +(but they can change from run-to-run or machine-to-machine) + +DATA_DIR will never change at runtime, but you can run +archivebox from inside a different DATA_DIR on the same machine. + +This is loaded very early in the archivebox startup flow, so nothing in this file +or imported from this file should import anything from archivebox.config.common, +django, other INSTALLED_APPS, or anything else that is not in a standard library. +""" + __package__ = 'archivebox.config' import re @@ -197,10 +209,12 @@ class ConstantsDict(Mapping): @classmethod def __getitem__(cls, key: str): + # so it behaves like a dict[key] == dict.key or object attr return getattr(cls, key) @classmethod def __benedict__(cls): + # when casting to benedict, only include uppercase keys that don't start with an underscore return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')}) @classmethod @@ -214,5 +228,6 @@ class ConstantsDict(Mapping): CONSTANTS = ConstantsDict() CONSTANTS_CONFIG = CONSTANTS.__benedict__() -# add all key: values to globals() for easier importing -globals().update(CONSTANTS) +# add all key: values to globals() for easier importing, e.g.: +# from archivebox.config.constants import IS_ROOT, PERSONAS_DIR, ... +# globals().update(CONSTANTS) diff --git a/archivebox/main.py b/archivebox/main.py index 7f196a3c..e05c696d 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -22,7 +22,7 @@ from archivebox.misc.logging import stderr, hint from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG from archivebox.config.permissions import SudoPermission, IN_DOCKER -from archivebox.config.configfile import ( +from archivebox.config.collection import ( write_config_file, load_all_config, get_real_name, diff --git a/archivebox/misc/util.py b/archivebox/misc/util.py index a856fe64..6f54ada6 100644 --- a/archivebox/misc/util.py +++ b/archivebox/misc/util.py @@ -126,6 +126,7 @@ def is_static_file(url: str): def enforce_types(func): """ Enforce function arg and kwarg types at runtime using its python3 type hints + Simpler version of pydantic @validate_call decorator """ # TODO: check return type as well @@ -283,6 +284,7 @@ def get_headers(url: str, timeout: int=None) -> str: def ansi_to_html(text: str) -> str: """ Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html + Simple way to render colored CLI stdout/stderr in HTML properly, Textual/rich is probably better though. """ TEMPLATE = '
' From b61f6ff8d8b608d03fe10092567585dc6f33f21f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 24 Oct 2024 15:41:43 -0700 Subject: [PATCH 02/25] rename system_tasks queue to commands queue --- archivebox/core/settings.py | 4 +- archivebox/queues/supervisor_util.py | 142 ++++++++++++++------------- archivebox/queues/tasks.py | 12 +-- 3 files changed, 82 insertions(+), 76 deletions(-) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 3810954e..2b9e7edb 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -267,7 +267,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file HUEY = { "huey_class": "huey.SqliteHuey", "filename": CONSTANTS.QUEUE_DATABASE_FILENAME, - "name": "system_tasks", + "name": "commands", "results": True, "store_none": True, "immediate": False, @@ -288,7 +288,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file # https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up # https://github.com/gaiacoop/django-huey DJANGO_HUEY = { - "default": "system_tasks", + "default": "commands", "queues": { HUEY["name"]: HUEY.copy(), # more registered here at plugin import-time by BaseQueue.register() diff --git a/archivebox/queues/supervisor_util.py b/archivebox/queues/supervisor_util.py index f181da08..0a4285f8 100644 --- a/archivebox/queues/supervisor_util.py +++ b/archivebox/queues/supervisor_util.py @@ -26,6 +26,23 @@ CONFIG_FILE_NAME = "supervisord.conf" PID_FILE_NAME = "supervisord.pid" WORKERS_DIR_NAME = "workers" +SCHEDULER_WORKER = { + "name": "worker_scheduler", + "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks", + "autostart": "true", + "autorestart": "true", + "stdout_logfile": "logs/worker_scheduler.log", + "redirect_stderr": "true", +} +COMMAND_WORKER = { + "name": "worker_commands", + "command": "archivebox manage djangohuey --queue commands -w 4 -k thread --no-periodic --disable-health-check", + "autostart": "true", + "autorestart": "true", + "stdout_logfile": "logs/worker_commands.log", + "redirect_stderr": "true", +} + @cache def get_sock_file(): """Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits""" @@ -84,33 +101,35 @@ files = {WORKERS_DIR}/*.conf """ CONFIG_FILE.write_text(config_content) - Path.mkdir(WORKERS_DIR, exist_ok=True) + Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True) + (WORKERS_DIR / 'initial_startup.conf').write_text('') # hides error about "no files found to include" when supervisord starts def create_worker_config(daemon): + """Create a supervisord worker config file for a given daemon""" SOCK_FILE = get_sock_file() WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME - Path.mkdir(WORKERS_DIR, exist_ok=True) + Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True) name = daemon['name'] - configfile = WORKERS_DIR / f"{name}.conf" + worker_conf = WORKERS_DIR / f"{name}.conf" - config_content = f"[program:{name}]\n" + worker_str = f"[program:{name}]\n" for key, value in daemon.items(): if key == 'name': continue - config_content += f"{key}={value}\n" - config_content += "\n" + worker_str += f"{key}={value}\n" + worker_str += "\n" - configfile.write_text(config_content) + worker_conf.write_text(worker_str) def get_existing_supervisord_process(): SOCK_FILE = get_sock_file() try: transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}") - server = ServerProxy("http://localhost", transport=transport) + server = ServerProxy("http://localhost", transport=transport) # user:pass@localhost doesn't work for some reason with unix://.sock, cant seem to silence CRIT no-auth warning current_state = cast(Dict[str, int | str], server.supervisor.getState()) if current_state["statename"] == "RUNNING": pid = server.supervisor.getPID() @@ -127,6 +146,7 @@ def stop_existing_supervisord_process(): PID_FILE = SOCK_FILE.parent / PID_FILE_NAME try: + # if pid file exists, load PID int try: pid = int(PID_FILE.read_text()) except (FileNotFoundError, ValueError): @@ -136,15 +156,15 @@ def stop_existing_supervisord_process(): print(f"[πŸ¦Έβ€β™‚οΈ] Stopping supervisord process (pid={pid})...") proc = psutil.Process(pid) proc.terminate() - proc.wait() - except (Exception, BrokenPipeError, IOError): + proc.wait(timeout=5) + except (BaseException, BrokenPipeError, IOError, KeyboardInterrupt): pass finally: try: # clear PID file and socket file PID_FILE.unlink(missing_ok=True) get_sock_file().unlink(missing_ok=True) - except Exception: + except BaseException: pass def start_new_supervisord_process(daemonize=False): @@ -278,47 +298,6 @@ def start_worker(supervisor, daemon, lazy=False): raise Exception(f"Failed to start worker {daemon['name']}! Only found: {procs}") -def watch_worker(supervisor, daemon_name, interval=5): - """loop continuously and monitor worker's health""" - while True: - proc = get_worker(supervisor, daemon_name) - if not proc: - raise Exception("Worker dissapeared while running! " + daemon_name) - - if proc['statename'] == 'STOPPED': - return proc - - if proc['statename'] == 'RUNNING': - time.sleep(1) - continue - - if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'): - print(f'[πŸ¦Έβ€β™‚οΈ] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}') - time.sleep(interval) - continue - -def tail_worker_logs(log_path: str): - get_or_create_supervisord_process(daemonize=False) - - from rich.live import Live - from rich.table import Table - - table = Table() - table.add_column("TS") - table.add_column("URL") - - try: - with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid - with open(log_path, 'r') as f: - for line in follow(f): - if '://' in line: - live.console.print(f"Working on: {line.strip()}") - # table.add_row("123124234", line.strip()) - except (KeyboardInterrupt, BrokenPipeError, IOError): - STDERR.print("\n[πŸ›‘] Got Ctrl+C, stopping gracefully...") - except SystemExit: - pass - def get_worker(supervisor, daemon_name): try: return supervisor.getProcessInfo(daemon_name) @@ -350,28 +329,55 @@ def stop_worker(supervisor, daemon_name): raise Exception(f"Failed to stop worker {daemon_name}!") +def tail_worker_logs(log_path: str): + get_or_create_supervisord_process(daemonize=False) + + from rich.live import Live + from rich.table import Table + + table = Table() + table.add_column("TS") + table.add_column("URL") + + try: + with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid + with open(log_path, 'r') as f: + for line in follow(f): + if '://' in line: + live.console.print(f"Working on: {line.strip()}") + # table.add_row("123124234", line.strip()) + except (KeyboardInterrupt, BrokenPipeError, IOError): + STDERR.print("\n[πŸ›‘] Got Ctrl+C, stopping gracefully...") + except SystemExit: + pass + +def watch_worker(supervisor, daemon_name, interval=5): + """loop continuously and monitor worker's health""" + while True: + proc = get_worker(supervisor, daemon_name) + if not proc: + raise Exception("Worker dissapeared while running! " + daemon_name) + + if proc['statename'] == 'STOPPED': + return proc + + if proc['statename'] == 'RUNNING': + time.sleep(1) + continue + + if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'): + print(f'[πŸ¦Έβ€β™‚οΈ] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}') + time.sleep(interval) + continue + def start_server_workers(host='0.0.0.0', port='8000', daemonize=False): supervisor = get_or_create_supervisord_process(daemonize=daemonize) bg_workers = [ - { - "name": "worker_scheduler", - "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks", - "autostart": "true", - "autorestart": "true", - "stdout_logfile": "logs/worker_scheduler.log", - "redirect_stderr": "true", - }, - { - "name": "worker_system_tasks", - "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --no-periodic --disable-health-check", - "autostart": "true", - "autorestart": "true", - "stdout_logfile": "logs/worker_system_tasks.log", - "redirect_stderr": "true", - }, + SCHEDULER_WORKER, + COMMAND_WORKER, ] fg_worker = { "name": "worker_daphne", diff --git a/archivebox/queues/tasks.py b/archivebox/queues/tasks.py index acfeab0b..6f62a8c1 100644 --- a/archivebox/queues/tasks.py +++ b/archivebox/queues/tasks.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.queues' from functools import wraps -from django.utils import timezone +# from django.utils import timezone from django_huey import db_task, task @@ -10,7 +10,7 @@ from huey_monitor.tqdm import ProcessInfo from .supervisor_util import get_or_create_supervisord_process -# @db_task(queue="system_tasks", context=True, schedule=1) +# @db_task(queue="commands", context=True, schedule=1) # def scheduler_tick(): # print('SCHEDULER TICK', timezone.now().isoformat()) # # abx.archivebox.events.on_scheduler_runloop_start(timezone.now(), machine=Machine.objects.get_current_machine()) @@ -43,7 +43,7 @@ def db_task_with_parent(func): return wrapper -@db_task(queue="system_tasks", context=True) +@db_task(queue="commands", context=True) def bg_add(add_kwargs, task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -62,7 +62,7 @@ def bg_add(add_kwargs, task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -83,7 +83,7 @@ def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -104,7 +104,7 @@ def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_snapshot(snapshot, overwrite=False, methods=None, task=None, parent_task_id=None): # get_or_create_supervisord_process(daemonize=False) From 4b6f08b0fe1d93a79772dd1d4d7fdf0d4f409432 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 24 Oct 2024 15:42:19 -0700 Subject: [PATCH 03/25] swap more direct settings.CONFIG access to abx getters --- archivebox/index/json.py | 7 +++---- archivebox/main.py | 9 +++++---- archivebox/misc/util.py | 28 ++++++++++++++++++---------- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 017dbc94..eaa93c2e 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -8,6 +8,8 @@ from pathlib import Path from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union +import abx.archivebox.reads + from archivebox.config import VERSION, DATA_DIR, CONSTANTS from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG @@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types @enforce_types def generate_json_index_from_links(links: List[Link], with_headers: bool): - from django.conf import settings - MAIN_INDEX_HEADER = { 'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.', 'schema': 'archivebox.index.json', @@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): 'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki', 'source': 'https://github.com/ArchiveBox/ArchiveBox', 'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues', - 'dependencies': settings.BINARIES.to_dict(), + 'dependencies': dict(abx.archivebox.reads.get_BINARIES()), }, } - if with_headers: output = { **MAIN_INDEX_HEADER, diff --git a/archivebox/main.py b/archivebox/main.py index e05c696d..fab99dc9 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -1052,7 +1052,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina from rich import print from django.conf import settings - from archivebox import CONSTANTS + + import abx.archivebox.reads from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.config.paths import get_or_create_working_lib_dir @@ -1075,11 +1076,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina package_manager_names = ', '.join( f'[yellow]{binprovider.name}[/yellow]' - for binprovider in reversed(list(settings.BINPROVIDERS.values())) + for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())) if not binproviders or (binproviders and binprovider.name in binproviders) ) print(f'[+] Setting up package managers {package_manager_names}...') - for binprovider in reversed(list(settings.BINPROVIDERS.values())): + for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())): if binproviders and binprovider.name not in binproviders: continue try: @@ -1092,7 +1093,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print() - for binary in reversed(list(settings.BINARIES.values())): + for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())): if binary.name in ('archivebox', 'django', 'sqlite', 'python'): # obviously must already be installed if we are running continue diff --git a/archivebox/misc/util.py b/archivebox/misc/util.py index 6f54ada6..6195252e 100644 --- a/archivebox/misc/util.py +++ b/archivebox/misc/util.py @@ -5,7 +5,7 @@ import requests import json as pyjson import http.cookiejar -from typing import List, Optional, Any +from typing import List, Optional, Any, Callable from pathlib import Path from inspect import signature from functools import wraps @@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout from base32_crockford import encode as base32_encode # type: ignore from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding try: - import chardet + import chardet # type:ignore detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"] except ImportError: detect_encoding = lambda rawdata: "utf-8" -from archivebox.config import CONSTANTS -from archivebox.config.common import ARCHIVING_CONFIG +from archivebox.config.constants import CONSTANTS from .logging import COLOR_DICT @@ -187,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str: @enforce_types -def parse_date(date: Any) -> Optional[datetime]: +def parse_date(date: Any) -> datetime: """Parse unix timestamps, iso format, and human-readable strings""" if date is None: - return None + return None # type: ignore if isinstance(date, datetime): if date.tzinfo is None: @@ -213,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]: def download_url(url: str, timeout: int=None) -> str: """Download the contents of a remote url and return the text""" + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT session = requests.Session() @@ -242,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str: return url.rsplit('/', 1)[-1] @enforce_types -def get_headers(url: str, timeout: int=None) -> str: +def get_headers(url: str, timeout: int | None=None) -> str: """Download the contents of a remote url and return the headers""" + # TODO: get rid of this and use an abx pluggy hook instead + + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT try: @@ -308,13 +313,13 @@ def ansi_to_html(text: str) -> str: @enforce_types def dedupe(options: List[str]) -> List[str]: """ - Deduplicates the given options. Options that come later clobber earlier - conflicting options. + Deduplicates the given CLI args by key=value. Options that come later override earlier. """ deduped = {} for option in options: - deduped[option.split('=')[0]] = option + key = option.split('=')[0] + deduped[key] = option return list(deduped.values()) @@ -346,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder): elif cls_name in ('dict_items', 'dict_keys', 'dict_values'): return tuple(obj) + + elif isinstance(obj, Callable): + return str(obj) return pyjson.JSONEncoder.default(self, obj) From 5d9a32c36495c7cbc6a612205d3c2b6c7dd85505 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 25 Oct 2024 01:06:12 -0700 Subject: [PATCH 04/25] wip --- archivebox/abx/__init__.py | 131 --- archivebox/abx/archivebox/__init__.py | 30 - archivebox/abx/archivebox/base_binary.py | 117 --- archivebox/abx/archivebox/base_extractor.py | 204 ---- archivebox/abx/archivebox/base_replayer.py | 25 - archivebox/abx/archivebox/hookspec.py | 52 - archivebox/abx/archivebox/reads.py | 160 --- archivebox/abx/django/__init__.py | 1 - archivebox/abx/django/use.py | 101 -- archivebox/abx/hookspec.py | 22 - archivebox/abx/manager.py | 30 - archivebox/abx/pydantic_pkgr/__init__.py | 1 - archivebox/abx/pydantic_pkgr/hookspec.py | 13 - archivebox/core/settings.py | 61 +- archivebox/plugins_pkg/npm/binproviders.py | 42 - archivebox/vendor/__init__.py | 4 +- archivebox/vendor/pocket | 1 - archivebox/vendor/pydantic-pkgr | 1 - click_test.py | 32 + .../README.md | 0 .../__init__.py | 0 .../config.py | 0 .../pyproject.toml | 7 + .../abx-plugin-chrome-extractor/README.md | 0 .../abx-plugin-chrome-extractor}/__init__.py | 0 .../abx-plugin-chrome-extractor}/binaries.py | 37 +- .../abx-plugin-chrome-extractor}/config.py | 0 .../pyproject.toml | 7 + .../abx-plugin-curl-extractor/README.md | 0 .../abx-plugin-curl-extractor}/__init__.py | 0 .../abx-plugin-curl-extractor}/binaries.py | 0 .../abx-plugin-curl-extractor}/config.py | 0 .../abx-plugin-curl-extractor/pyproject.toml | 7 + .../abx-plugin-default-binproviders/README.md | 0 .../abx_plugin_default_binproviders.py | 24 + .../pyproject.toml | 18 + .../abx-plugin-favicon-extractor/README.md | 0 .../abx-plugin-favicon-extractor}/__init__.py | 0 .../abx-plugin-favicon-extractor}/config.py | 0 .../pyproject.toml | 7 + packages/abx-plugin-git-extractor/README.md | 0 .../abx-plugin-git-extractor}/__init__.py | 0 .../abx-plugin-git-extractor}/binaries.py | 0 .../abx-plugin-git-extractor}/config.py | 0 .../abx-plugin-git-extractor}/extractors.py | 0 .../abx-plugin-git-extractor/pyproject.toml | 7 + .../abx-plugin-htmltotext-extractor/README.md | 0 .../__init__.py | 0 .../config.py | 0 .../pyproject.toml | 7 + packages/abx-plugin-ldap-auth/README.md | 0 .../abx-plugin-ldap-auth}/__init__.py | 0 .../abx-plugin-ldap-auth}/binaries.py | 0 .../abx-plugin-ldap-auth}/config.py | 0 packages/abx-plugin-ldap-auth/pyproject.toml | 22 + .../abx-plugin-mercury-extractor/README.md | 0 .../abx-plugin-mercury-extractor}/__init__.py | 0 .../abx-plugin-mercury-extractor}/binaries.py | 0 .../abx-plugin-mercury-extractor}/config.py | 0 .../extractors.py | 0 .../pyproject.toml | 7 + packages/abx-plugin-npm-binprovider/README.md | 0 .../abx_plugin_npm_binprovider}/__init__.py | 18 +- .../abx_plugin_npm_binprovider}/binaries.py | 19 +- .../binproviders.py | 39 + .../abx_plugin_npm_binprovider}/config.py | 5 +- .../abx-plugin-npm-binprovider/pyproject.toml | 20 + packages/abx-plugin-pip-binprovider/README.md | 0 .../abx_plugin_pip_binprovider}/.plugin_order | 0 .../abx_plugin_pip_binprovider}/__init__.py | 24 +- .../abx_plugin_pip_binprovider}/binaries.py | 49 +- .../binproviders.py | 31 +- .../abx_plugin_pip_binprovider}/config.py | 0 .../abx-plugin-pip-binprovider/pyproject.toml | 22 + .../README.md | 0 .../__init__.py | 20 +- .../binaries.py | 12 +- .../binproviders.py | 23 +- .../config.py | 5 +- .../pyproject.toml | 20 + .../abx-plugin-pocket-extractor/README.md | 0 .../abx-plugin-pocket-extractor}/__init__.py | 0 .../abx-plugin-pocket-extractor}/config.py | 0 .../pyproject.toml | 7 + .../README.md | 0 .../__init__.py | 0 .../binaries.py | 0 .../binproviders.py | 3 +- .../config.py | 0 .../pyproject.toml | 7 + .../README.md | 0 .../__init__.py | 0 .../binaries.py | 0 .../config.py | 0 .../extractors.py | 0 .../pyproject.toml | 7 + .../abx-plugin-readwise-extractor/README.md | 0 .../__init__.py | 0 .../abx-plugin-readwise-extractor}/config.py | 0 .../pyproject.toml | 7 + packages/abx-plugin-ripgrep-search/README.md | 0 .../abx-plugin-ripgrep-search}/__init__.py | 0 .../abx-plugin-ripgrep-search}/binaries.py | 0 .../abx-plugin-ripgrep-search}/config.py | 0 .../abx-plugin-ripgrep-search/pyproject.toml | 7 + .../searchbackend.py | 0 .../abx-plugin-singlefile-extractor/README.md | 0 .../__init__.py | 0 .../binaries.py | 0 .../config.py | 0 .../extractors.py | 0 .../models.py | 0 .../pyproject.toml | 7 + packages/abx-plugin-sonic-search/README.md | 0 .../abx-plugin-sonic-search}/__init__.py | 0 .../abx-plugin-sonic-search}/binaries.py | 0 .../abx-plugin-sonic-search}/config.py | 0 .../abx-plugin-sonic-search/pyproject.toml | 7 + .../abx-plugin-sonic-search}/searchbackend.py | 0 .../abx-plugin-sqlitefts-search/README.md | 0 .../abx-plugin-sqlitefts-search}/__init__.py | 0 .../abx-plugin-sqlitefts-search}/config.py | 0 .../pyproject.toml | 7 + .../searchbackend.py | 0 packages/abx-plugin-wget-extractor/README.md | 0 .../abx-plugin-wget-extractor}/__init__.py | 0 .../abx-plugin-wget-extractor}/binaries.py | 0 .../abx-plugin-wget-extractor}/config.py | 0 .../abx-plugin-wget-extractor}/extractors.py | 0 .../abx-plugin-wget-extractor/pyproject.toml | 7 + .../abx-plugin-wget-extractor}/wget_util.py | 0 packages/abx-plugin-ytdlp-extractor/README.md | 0 .../abx-plugin-ytdlp-extractor}/__init__.py | 0 .../abx-plugin-ytdlp-extractor}/binaries.py | 0 .../abx-plugin-ytdlp-extractor}/config.py | 0 .../abx-plugin-ytdlp-extractor/pyproject.toml | 7 + packages/abx-spec-archivebox/README.md | 0 .../abx_spec_archivebox/__init__.py | 7 + .../abx_spec_archivebox}/effects.py | 0 .../abx_spec_archivebox}/events.py | 0 .../abx_spec_archivebox/reads.py | 33 + .../abx_spec_archivebox}/states.py | 0 .../abx_spec_archivebox}/writes.py | 0 packages/abx-spec-archivebox/pyproject.toml | 17 + .../abx_spec_config/__init__.py | 50 + .../abx_spec_config}/base_configset.py | 151 +-- .../abx_spec_config}/toml_util.py | 0 packages/abx-spec-config/pyproject.toml | 17 + packages/abx-spec-django/README.md | 0 .../abx_spec_django/__init__.py | 75 +- .../abx-spec-django/abx_spec_django}/apps.py | 7 +- packages/abx-spec-django/pyproject.toml | 17 + packages/abx-spec-extractor/README.md | 0 .../abx-spec-extractor/abx_spec_extractor.py | 211 ++++ packages/abx-spec-extractor/pyproject.toml | 18 + packages/abx-spec-pydantic-pkgr/README.md | 0 .../abx_spec_pydantic_pkgr.py | 72 ++ .../abx-spec-pydantic-pkgr/pyproject.toml | 17 + packages/abx-spec-searchbackend/README.md | 0 .../abx_spec_searchbackend.py | 10 +- .../abx-spec-searchbackend/pyproject.toml | 18 + packages/abx/README.md | 0 packages/abx/abx.py | 344 +++++++ packages/abx/pyproject.toml | 14 + .../archivebox-pocket/.circleci/config.yml | 61 ++ packages/archivebox-pocket/.gitignore | 43 + packages/archivebox-pocket/LICENSE.md | 27 + packages/archivebox-pocket/MANIFEST.in | 2 + packages/archivebox-pocket/README.md | 66 ++ packages/archivebox-pocket/pocket.py | 366 +++++++ packages/archivebox-pocket/pyproject.toml | 19 + packages/archivebox-pocket/requirements.txt | 4 + packages/archivebox-pocket/setup.py | 41 + packages/archivebox-pocket/test_pocket.py | 52 + packages/pydantic-pkgr | 1 + pyproject.toml | 45 +- requirements.txt | 8 +- uv.lock | 933 ++++++++++++++++-- 178 files changed, 2982 insertions(+), 1322 deletions(-) delete mode 100644 archivebox/abx/__init__.py delete mode 100644 archivebox/abx/archivebox/__init__.py delete mode 100644 archivebox/abx/archivebox/base_binary.py delete mode 100644 archivebox/abx/archivebox/base_extractor.py delete mode 100644 archivebox/abx/archivebox/base_replayer.py delete mode 100644 archivebox/abx/archivebox/hookspec.py delete mode 100644 archivebox/abx/archivebox/reads.py delete mode 100644 archivebox/abx/django/__init__.py delete mode 100644 archivebox/abx/django/use.py delete mode 100644 archivebox/abx/hookspec.py delete mode 100644 archivebox/abx/manager.py delete mode 100644 archivebox/abx/pydantic_pkgr/__init__.py delete mode 100644 archivebox/abx/pydantic_pkgr/hookspec.py delete mode 100644 archivebox/plugins_pkg/npm/binproviders.py delete mode 160000 archivebox/vendor/pocket delete mode 160000 archivebox/vendor/pydantic-pkgr create mode 100644 click_test.py rename archivebox/plugins_auth/__init__.py => packages/abx-plugin-archivedotorg-extractor/README.md (100%) rename {archivebox/plugins_extractor/archivedotorg => packages/abx-plugin-archivedotorg-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/archivedotorg => packages/abx-plugin-archivedotorg-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-archivedotorg-extractor/pyproject.toml rename archivebox/plugins_extractor/__init__.py => packages/abx-plugin-chrome-extractor/README.md (100%) rename {archivebox/plugins_extractor/chrome => packages/abx-plugin-chrome-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/chrome => packages/abx-plugin-chrome-extractor}/binaries.py (84%) rename {archivebox/plugins_extractor/chrome => packages/abx-plugin-chrome-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-chrome-extractor/pyproject.toml rename archivebox/plugins_pkg/__init__.py => packages/abx-plugin-curl-extractor/README.md (100%) rename {archivebox/plugins_extractor/curl => packages/abx-plugin-curl-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/curl => packages/abx-plugin-curl-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/curl => packages/abx-plugin-curl-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-curl-extractor/pyproject.toml rename archivebox/plugins_search/__init__.py => packages/abx-plugin-default-binproviders/README.md (100%) create mode 100644 packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py create mode 100644 packages/abx-plugin-default-binproviders/pyproject.toml create mode 100644 packages/abx-plugin-favicon-extractor/README.md rename {archivebox/plugins_extractor/favicon => packages/abx-plugin-favicon-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/favicon => packages/abx-plugin-favicon-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-favicon-extractor/pyproject.toml create mode 100644 packages/abx-plugin-git-extractor/README.md rename {archivebox/plugins_extractor/git => packages/abx-plugin-git-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/git => packages/abx-plugin-git-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/git => packages/abx-plugin-git-extractor}/config.py (100%) rename {archivebox/plugins_extractor/git => packages/abx-plugin-git-extractor}/extractors.py (100%) create mode 100644 packages/abx-plugin-git-extractor/pyproject.toml create mode 100644 packages/abx-plugin-htmltotext-extractor/README.md rename {archivebox/plugins_extractor/htmltotext => packages/abx-plugin-htmltotext-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/htmltotext => packages/abx-plugin-htmltotext-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-htmltotext-extractor/pyproject.toml create mode 100644 packages/abx-plugin-ldap-auth/README.md rename {archivebox/plugins_auth/ldap => packages/abx-plugin-ldap-auth}/__init__.py (100%) rename {archivebox/plugins_auth/ldap => packages/abx-plugin-ldap-auth}/binaries.py (100%) rename {archivebox/plugins_auth/ldap => packages/abx-plugin-ldap-auth}/config.py (100%) create mode 100644 packages/abx-plugin-ldap-auth/pyproject.toml create mode 100644 packages/abx-plugin-mercury-extractor/README.md rename {archivebox/plugins_extractor/mercury => packages/abx-plugin-mercury-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/mercury => packages/abx-plugin-mercury-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/mercury => packages/abx-plugin-mercury-extractor}/config.py (100%) rename {archivebox/plugins_extractor/mercury => packages/abx-plugin-mercury-extractor}/extractors.py (100%) create mode 100644 packages/abx-plugin-mercury-extractor/pyproject.toml create mode 100644 packages/abx-plugin-npm-binprovider/README.md rename {archivebox/plugins_pkg/npm => packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider}/__init__.py (63%) rename {archivebox/plugins_pkg/npm => packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider}/binaries.py (72%) create mode 100644 packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py rename {archivebox/plugins_pkg/npm => packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider}/config.py (79%) create mode 100644 packages/abx-plugin-npm-binprovider/pyproject.toml create mode 100644 packages/abx-plugin-pip-binprovider/README.md rename {archivebox/plugins_pkg/pip => packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider}/.plugin_order (100%) rename {archivebox/plugins_pkg/pip => packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider}/__init__.py (62%) rename {archivebox/plugins_pkg/pip => packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider}/binaries.py (84%) rename {archivebox/plugins_pkg/pip => packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider}/binproviders.py (76%) rename {archivebox/plugins_pkg/pip => packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider}/config.py (100%) create mode 100644 packages/abx-plugin-pip-binprovider/pyproject.toml create mode 100644 packages/abx-plugin-playwright-binprovider/README.md rename {archivebox/plugins_pkg/playwright => packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider}/__init__.py (56%) rename {archivebox/plugins_pkg/playwright => packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider}/binaries.py (52%) rename {archivebox/plugins_pkg/playwright => packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider}/binproviders.py (90%) rename {archivebox/plugins_pkg/playwright => packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider}/config.py (59%) create mode 100644 packages/abx-plugin-playwright-binprovider/pyproject.toml create mode 100644 packages/abx-plugin-pocket-extractor/README.md rename {archivebox/plugins_extractor/pocket => packages/abx-plugin-pocket-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/pocket => packages/abx-plugin-pocket-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-pocket-extractor/pyproject.toml create mode 100644 packages/abx-plugin-puppeteer-binprovider/README.md rename {archivebox/plugins_pkg/puppeteer => packages/abx-plugin-puppeteer-binprovider}/__init__.py (100%) rename {archivebox/plugins_pkg/puppeteer => packages/abx-plugin-puppeteer-binprovider}/binaries.py (100%) rename {archivebox/plugins_pkg/puppeteer => packages/abx-plugin-puppeteer-binprovider}/binproviders.py (96%) rename {archivebox/plugins_pkg/puppeteer => packages/abx-plugin-puppeteer-binprovider}/config.py (100%) create mode 100644 packages/abx-plugin-puppeteer-binprovider/pyproject.toml create mode 100644 packages/abx-plugin-readability-extractor/README.md rename {archivebox/plugins_extractor/readability => packages/abx-plugin-readability-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/readability => packages/abx-plugin-readability-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/readability => packages/abx-plugin-readability-extractor}/config.py (100%) rename {archivebox/plugins_extractor/readability => packages/abx-plugin-readability-extractor}/extractors.py (100%) create mode 100644 packages/abx-plugin-readability-extractor/pyproject.toml create mode 100644 packages/abx-plugin-readwise-extractor/README.md rename {archivebox/plugins_extractor/readwise => packages/abx-plugin-readwise-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/readwise => packages/abx-plugin-readwise-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-readwise-extractor/pyproject.toml create mode 100644 packages/abx-plugin-ripgrep-search/README.md rename {archivebox/plugins_search/ripgrep => packages/abx-plugin-ripgrep-search}/__init__.py (100%) rename {archivebox/plugins_search/ripgrep => packages/abx-plugin-ripgrep-search}/binaries.py (100%) rename {archivebox/plugins_search/ripgrep => packages/abx-plugin-ripgrep-search}/config.py (100%) create mode 100644 packages/abx-plugin-ripgrep-search/pyproject.toml rename {archivebox/plugins_search/ripgrep => packages/abx-plugin-ripgrep-search}/searchbackend.py (100%) create mode 100644 packages/abx-plugin-singlefile-extractor/README.md rename {archivebox/plugins_extractor/singlefile => packages/abx-plugin-singlefile-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/singlefile => packages/abx-plugin-singlefile-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/singlefile => packages/abx-plugin-singlefile-extractor}/config.py (100%) rename {archivebox/plugins_extractor/singlefile => packages/abx-plugin-singlefile-extractor}/extractors.py (100%) rename {archivebox/plugins_extractor/singlefile => packages/abx-plugin-singlefile-extractor}/models.py (100%) create mode 100644 packages/abx-plugin-singlefile-extractor/pyproject.toml create mode 100644 packages/abx-plugin-sonic-search/README.md rename {archivebox/plugins_search/sonic => packages/abx-plugin-sonic-search}/__init__.py (100%) rename {archivebox/plugins_search/sonic => packages/abx-plugin-sonic-search}/binaries.py (100%) rename {archivebox/plugins_search/sonic => packages/abx-plugin-sonic-search}/config.py (100%) create mode 100644 packages/abx-plugin-sonic-search/pyproject.toml rename {archivebox/plugins_search/sonic => packages/abx-plugin-sonic-search}/searchbackend.py (100%) create mode 100644 packages/abx-plugin-sqlitefts-search/README.md rename {archivebox/plugins_search/sqlitefts => packages/abx-plugin-sqlitefts-search}/__init__.py (100%) rename {archivebox/plugins_search/sqlitefts => packages/abx-plugin-sqlitefts-search}/config.py (100%) create mode 100644 packages/abx-plugin-sqlitefts-search/pyproject.toml rename {archivebox/plugins_search/sqlitefts => packages/abx-plugin-sqlitefts-search}/searchbackend.py (100%) create mode 100644 packages/abx-plugin-wget-extractor/README.md rename {archivebox/plugins_extractor/wget => packages/abx-plugin-wget-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/wget => packages/abx-plugin-wget-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/wget => packages/abx-plugin-wget-extractor}/config.py (100%) rename {archivebox/plugins_extractor/wget => packages/abx-plugin-wget-extractor}/extractors.py (100%) create mode 100644 packages/abx-plugin-wget-extractor/pyproject.toml rename {archivebox/plugins_extractor/wget => packages/abx-plugin-wget-extractor}/wget_util.py (100%) create mode 100644 packages/abx-plugin-ytdlp-extractor/README.md rename {archivebox/plugins_extractor/ytdlp => packages/abx-plugin-ytdlp-extractor}/__init__.py (100%) rename {archivebox/plugins_extractor/ytdlp => packages/abx-plugin-ytdlp-extractor}/binaries.py (100%) rename {archivebox/plugins_extractor/ytdlp => packages/abx-plugin-ytdlp-extractor}/config.py (100%) create mode 100644 packages/abx-plugin-ytdlp-extractor/pyproject.toml create mode 100644 packages/abx-spec-archivebox/README.md create mode 100644 packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py rename {archivebox/abx/archivebox => packages/abx-spec-archivebox/abx_spec_archivebox}/effects.py (100%) rename {archivebox/abx/archivebox => packages/abx-spec-archivebox/abx_spec_archivebox}/events.py (100%) create mode 100644 packages/abx-spec-archivebox/abx_spec_archivebox/reads.py rename {archivebox/abx/archivebox => packages/abx-spec-archivebox/abx_spec_archivebox}/states.py (100%) rename {archivebox/abx/archivebox => packages/abx-spec-archivebox/abx_spec_archivebox}/writes.py (100%) create mode 100644 packages/abx-spec-archivebox/pyproject.toml create mode 100644 packages/abx-spec-config/abx_spec_config/__init__.py rename {archivebox/abx/archivebox => packages/abx-spec-config/abx_spec_config}/base_configset.py (73%) rename {archivebox/abx/archivebox => packages/abx-spec-config/abx_spec_config}/toml_util.py (100%) create mode 100644 packages/abx-spec-config/pyproject.toml create mode 100644 packages/abx-spec-django/README.md rename archivebox/abx/django/hookspec.py => packages/abx-spec-django/abx_spec_django/__init__.py (79%) rename {archivebox/abx/django => packages/abx-spec-django/abx_spec_django}/apps.py (71%) create mode 100644 packages/abx-spec-django/pyproject.toml create mode 100644 packages/abx-spec-extractor/README.md create mode 100644 packages/abx-spec-extractor/abx_spec_extractor.py create mode 100644 packages/abx-spec-extractor/pyproject.toml create mode 100644 packages/abx-spec-pydantic-pkgr/README.md create mode 100644 packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py create mode 100644 packages/abx-spec-pydantic-pkgr/pyproject.toml create mode 100644 packages/abx-spec-searchbackend/README.md rename archivebox/abx/archivebox/base_searchbackend.py => packages/abx-spec-searchbackend/abx_spec_searchbackend.py (73%) create mode 100644 packages/abx-spec-searchbackend/pyproject.toml create mode 100644 packages/abx/README.md create mode 100644 packages/abx/abx.py create mode 100644 packages/abx/pyproject.toml create mode 100644 packages/archivebox-pocket/.circleci/config.yml create mode 100644 packages/archivebox-pocket/.gitignore create mode 100644 packages/archivebox-pocket/LICENSE.md create mode 100644 packages/archivebox-pocket/MANIFEST.in create mode 100644 packages/archivebox-pocket/README.md create mode 100644 packages/archivebox-pocket/pocket.py create mode 100644 packages/archivebox-pocket/pyproject.toml create mode 100644 packages/archivebox-pocket/requirements.txt create mode 100644 packages/archivebox-pocket/setup.py create mode 100644 packages/archivebox-pocket/test_pocket.py create mode 160000 packages/pydantic-pkgr diff --git a/archivebox/abx/__init__.py b/archivebox/abx/__init__.py deleted file mode 100644 index c571a2e3..00000000 --- a/archivebox/abx/__init__.py +++ /dev/null @@ -1,131 +0,0 @@ -__package__ = 'abx' - -import importlib -from pathlib import Path -from typing import Dict, Callable, List - -from . import hookspec as base_spec -from abx.hookspec import hookimpl, hookspec # noqa -from abx.manager import pm, PluginManager # noqa - - -pm.add_hookspecs(base_spec) - - -###### PLUGIN DISCOVERY AND LOADING ######################################################## - -def get_plugin_order(plugin_entrypoint: Path): - order = 999 - try: - # if .plugin_order file exists, use it to set the load priority - order = int((plugin_entrypoint.parent / '.plugin_order').read_text()) - except FileNotFoundError: - pass - return (order, plugin_entrypoint) - -def register_hookspecs(hookspecs: List[str]): - """ - Register all the hookspecs from a list of module names. - """ - for hookspec_import_path in hookspecs: - hookspec_module = importlib.import_module(hookspec_import_path) - pm.add_hookspecs(hookspec_module) - - -def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: - """ - Find all the plugins in a given directory. Just looks for an __init__.py file. - """ - return { - f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent - for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order) - if plugin_entrypoint.parent.name != 'abx' - } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" - - -def get_pip_installed_plugins(group='abx'): - """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" - import importlib.metadata - - DETECTED_PLUGINS = {} # module_name: module_dir_path - for dist in list(importlib.metadata.distributions()): - for entrypoint in dist.entry_points: - if entrypoint.group != group or pm.is_blocked(entrypoint.name): - continue - DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent - # pm.register(plugin, name=ep.name) - # pm._plugin_distinfo.append((plugin, DistFacade(dist))) - return DETECTED_PLUGINS - - -def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]): - """ - Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories. - """ - DETECTED_PLUGINS = {} - for plugin_prefix, plugin_dir in plugin_dirs.items(): - DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) - return DETECTED_PLUGINS - - -# Load all plugins from pip packages, archivebox built-ins, and user plugins - -def load_plugins(plugins_dict: Dict[str, Path]): - """ - Load all the plugins from a dictionary of module names and directory paths. - """ - LOADED_PLUGINS = {} - for plugin_module, plugin_dir in plugins_dict.items(): - # print(f'Loading plugin: {plugin_module} from {plugin_dir}') - plugin_module_loaded = importlib.import_module(plugin_module) - pm.register(plugin_module_loaded) - LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN - # print(f' √ Loaded plugin: {plugin_module}') - return LOADED_PLUGINS - -def get_registered_plugins(): - """ - Get all the plugins registered with Pluggy. - """ - plugins = {} - plugin_to_distinfo = dict(pm.list_plugin_distinfo()) - for plugin in pm.get_plugins(): - plugin_info = { - "name": plugin.__name__, - "hooks": [h.name for h in pm.get_hookcallers(plugin) or ()], - } - distinfo = plugin_to_distinfo.get(plugin) - if distinfo: - plugin_info["version"] = distinfo.version - plugin_info["name"] = ( - getattr(distinfo, "name", None) or distinfo.project_name - ) - plugins[plugin_info["name"]] = plugin_info - return plugins - - - - -def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]: - """ - Get all the functions marked with @hookimpl on a module. - """ - if not plugin_pkg: - return {} - - hooks = {} - - plugin_module = importlib.import_module(plugin_pkg) - for attr_name in dir(plugin_module): - if attr_name.startswith('_'): - continue - try: - attr = getattr(plugin_module, attr_name) - if isinstance(attr, Callable): - hooks[attr_name] = None - pm.parse_hookimpl_opts(plugin_module, attr_name) - hooks[attr_name] = attr - except Exception as e: - print(f'Error getting hookimpls for {plugin_pkg}: {e}') - - return hooks diff --git a/archivebox/abx/archivebox/__init__.py b/archivebox/abx/archivebox/__init__.py deleted file mode 100644 index 58bbb447..00000000 --- a/archivebox/abx/archivebox/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -__package__ = 'abx.archivebox' - -import os -import importlib - -from typing import Dict -from pathlib import Path - - -def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]): - """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py""" - LOADED_PLUGINS = {} - for plugin_module, plugin_dir in reversed(plugins_dict.items()): - # print(f'Loading plugin: {plugin_module} from {plugin_dir}') - - # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py) - try: - plugin_module_loaded = importlib.import_module(plugin_module) - pm.register(plugin_module_loaded) - except Exception as e: - print(f'Error registering plugin: {plugin_module} - {e}') - - - # 2. then try to import plugin_module.apps as well - if os.access(plugin_dir / 'apps.py', os.R_OK): - plugin_apps = importlib.import_module(plugin_module + '.apps') - pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class) - - # print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}') - return LOADED_PLUGINS diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py deleted file mode 100644 index ee7ab5e1..00000000 --- a/archivebox/abx/archivebox/base_binary.py +++ /dev/null @@ -1,117 +0,0 @@ -__package__ = "abx.archivebox" - -import os -from typing import Optional, cast -from typing_extensions import Self - -from pydantic import validate_call -from pydantic_pkgr import ( - Binary, - BinProvider, - BinProviderName, - AptProvider, - BrewProvider, - EnvProvider, -) - -from archivebox.config.permissions import ARCHIVEBOX_USER - -import abx - - -class BaseBinProvider(BinProvider): - - # TODO: add install/load/load_or_install methods as abx.hookimpl methods - - @property - def admin_url(self) -> str: - # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO - return "/admin/environment/binaries/" - - @abx.hookimpl - def get_BINPROVIDERS(self): - return [self] - -class BaseBinary(Binary): - # TODO: formalize state diagram, final states, transitions, side effects, etc. - - @staticmethod - def symlink_to_lib(binary, bin_dir=None) -> None: - from archivebox.config.common import STORAGE_CONFIG - bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin' - - if not (binary.abspath and os.access(binary.abspath, os.R_OK)): - return - - try: - bin_dir.mkdir(parents=True, exist_ok=True) - symlink = bin_dir / binary.name - symlink.unlink(missing_ok=True) - symlink.symlink_to(binary.abspath) - symlink.chmod(0o777) # make sure its executable by everyone - except Exception as err: - # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') - # not actually needed, we can just run without it - pass - - @validate_call - def load(self, fresh=False, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - if fresh: - binary = super().load(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - else: - # get cached binary from db - try: - from machine.models import InstalledBinary - installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore - binary = InstalledBinary.load_from_db(installed_binary) - except Exception: - # maybe we are not in a DATA dir so there is no db, fallback to reading from fs - # (e.g. when archivebox version is run outside of a DATA dir) - binary = super().load(**kwargs) - return cast(Self, binary) - - @validate_call - def install(self, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - binary = super().install(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - return binary - - @validate_call - def load_or_install(self, fresh=False, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - try: - binary = self.load(fresh=fresh) - if binary and binary.version: - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - return binary - except Exception: - pass - return self.install(**kwargs) - - @property - def admin_url(self) -> str: - # e.g. /admin/environment/config/LdapConfig/ - return f"/admin/environment/binaries/{self.name}/" - - @abx.hookimpl - def get_BINARIES(self): - return [self] - - -class AptBinProvider(AptProvider, BaseBinProvider): - name: BinProviderName = "apt" - -class BrewBinProvider(BrewProvider, BaseBinProvider): - name: BinProviderName = "brew" - -class EnvBinProvider(EnvProvider, BaseBinProvider): - name: BinProviderName = "env" - - euid: Optional[int] = ARCHIVEBOX_USER - -apt = AptBinProvider() -brew = BrewBinProvider() -env = EnvBinProvider() diff --git a/archivebox/abx/archivebox/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py deleted file mode 100644 index 51dcc8d2..00000000 --- a/archivebox/abx/archivebox/base_extractor.py +++ /dev/null @@ -1,204 +0,0 @@ -__package__ = 'abx.archivebox' - -import json -import os - -from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple -from pathlib import Path - -from pydantic import AfterValidator -from pydantic_pkgr import BinName -from django.utils.functional import cached_property -from django.utils import timezone - -import abx - -from .base_binary import BaseBinary - - -def assert_no_empty_args(args: List[str]) -> List[str]: - assert all(len(arg) for arg in args) - return args - -ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())] - -HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))] -CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)] - - -class BaseExtractor: - name: ExtractorName - binary: BinName - - default_args: CmdArgsList = [] - extra_args: CmdArgsList = [] - - def get_output_path(self, snapshot) -> Path: - return Path(self.__class__.__name__.lower()) - - def should_extract(self, uri: str, config: dict | None=None) -> bool: - try: - assert self.detect_installed_binary().version - except Exception: - raise - # could not load binary - return False - - # output_dir = self.get_output_path(snapshot) - # if output_dir.glob('*.*'): - # return False - return True - - @abx.hookimpl - def extract(self, snapshot_id: str) -> Dict[str, Any]: - from core.models import Snapshot - from archivebox import CONSTANTS - - snapshot = Snapshot.objects.get(id=snapshot_id) - - if not self.should_extract(snapshot.url): - return {} - - status = 'failed' - start_ts = timezone.now() - uplink = self.detect_network_interface() - installed_binary = self.detect_installed_binary() - machine = installed_binary.machine - assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true - - output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid) - output_dir.mkdir(parents=True, exist_ok=True) - - # execute the extractor binary with the given args - args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args] - cmd = [str(installed_binary.abspath), *args] - proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir) - - # collect the output - end_ts = timezone.now() - output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')) - stdout = proc.stdout.strip() - stderr = proc.stderr.strip() - output_json = None - output_text = stdout - try: - output_json = json.loads(stdout.strip()) - output_text = None - except json.JSONDecodeError: - pass - - errors = [] - if proc.returncode == 0: - status = 'success' - else: - errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}') - - # increment health stats counters - if status == 'success': - machine.record_health_success() - uplink.record_health_success() - installed_binary.record_health_success() - else: - machine.record_health_failure() - uplink.record_health_failure() - installed_binary.record_health_failure() - - return { - 'extractor': self.name, - - 'snapshot': { - 'id': snapshot.id, - 'abid': snapshot.abid, - 'url': snapshot.url, - 'created_by_id': snapshot.created_by_id, - }, - - 'machine': { - 'id': machine.id, - 'abid': machine.abid, - 'guid': machine.guid, - 'hostname': machine.hostname, - 'hw_in_docker': machine.hw_in_docker, - 'hw_in_vm': machine.hw_in_vm, - 'hw_manufacturer': machine.hw_manufacturer, - 'hw_product': machine.hw_product, - 'hw_uuid': machine.hw_uuid, - 'os_arch': machine.os_arch, - 'os_family': machine.os_family, - 'os_platform': machine.os_platform, - 'os_release': machine.os_release, - 'os_kernel': machine.os_kernel, - }, - - 'uplink': { - 'id': uplink.id, - 'abid': uplink.abid, - 'mac_address': uplink.mac_address, - 'ip_public': uplink.ip_public, - 'ip_local': uplink.ip_local, - 'dns_server': uplink.dns_server, - 'hostname': uplink.hostname, - 'iface': uplink.iface, - 'isp': uplink.isp, - 'city': uplink.city, - 'region': uplink.region, - 'country': uplink.country, - }, - - 'binary': { - 'id': installed_binary.id, - 'abid': installed_binary.abid, - 'name': installed_binary.name, - 'binprovider': installed_binary.binprovider, - 'abspath': installed_binary.abspath, - 'version': installed_binary.version, - 'sha256': installed_binary.sha256, - }, - - 'cmd': cmd, - 'stdout': stdout, - 'stderr': stderr, - 'returncode': proc.returncode, - 'start_ts': start_ts, - 'end_ts': end_ts, - - 'status': status, - 'errors': errors, - 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)), - 'output_files': output_files, - 'output_json': output_json or {}, - 'output_text': output_text or '', - } - - # TODO: move this to a hookimpl - def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None): - cwd = cwd or Path(os.getcwd()) - binary = self.load_binary(installed_binary=installed_binary) - - return binary.exec(cmd=args, cwd=cwd) - - @cached_property - def BINARY(self) -> BaseBinary: - import abx.archivebox.reads - for binary in abx.archivebox.reads.get_BINARIES().values(): - if binary.name == self.binary: - return binary - raise ValueError(f'Binary {self.binary} not found') - - def detect_installed_binary(self): - from machine.models import InstalledBinary - # hydrates binary from DB/cache if record of installed version is recent enough - # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host - return InstalledBinary.objects.get_from_db_or_cache(self.BINARY) - - def load_binary(self, installed_binary=None) -> BaseBinary: - installed_binary = installed_binary or self.detect_installed_binary() - return installed_binary.load_from_db() - - def detect_network_interface(self): - from machine.models import NetworkInterface - return NetworkInterface.objects.current() - - @abx.hookimpl - def get_EXTRACTORS(self): - return [self] diff --git a/archivebox/abx/archivebox/base_replayer.py b/archivebox/abx/archivebox/base_replayer.py deleted file mode 100644 index 097a9e94..00000000 --- a/archivebox/abx/archivebox/base_replayer.py +++ /dev/null @@ -1,25 +0,0 @@ -__package__ = 'abx.archivebox' - -import abx - - -class BaseReplayer: - """Describes how to render an ArchiveResult in several contexts""" - - url_pattern: str = '*' - - row_template: str = 'plugins/generic_replayer/templates/row.html' - embed_template: str = 'plugins/generic_replayer/templates/embed.html' - fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html' - - # row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view' - # embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view' - # fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view' - # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' - # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' - - @abx.hookimpl - def get_REPLAYERS(self): - return [self] - - # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc... diff --git a/archivebox/abx/archivebox/hookspec.py b/archivebox/abx/archivebox/hookspec.py deleted file mode 100644 index bfcb93b8..00000000 --- a/archivebox/abx/archivebox/hookspec.py +++ /dev/null @@ -1,52 +0,0 @@ -__package__ = 'abx.archivebox' - -from typing import Dict, Any - -from .. import hookspec - -from .base_binary import BaseBinary, BaseBinProvider -from .base_configset import BaseConfigSet -from .base_extractor import BaseExtractor -from .base_searchbackend import BaseSearchBackend - - -@hookspec -def get_PLUGIN() -> Dict[str, Dict[str, Any]]: - return {} - -@hookspec -def get_CONFIG() -> Dict[str, BaseConfigSet]: - return {} - - - -@hookspec -def get_EXTRACTORS() -> Dict[str, BaseExtractor]: - return {} - -@hookspec -def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]: - return {} - -# @hookspec -# def get_REPLAYERS() -> Dict[str, BaseReplayer]: -# return {} - -# @hookspec -# def get_ADMINDATAVIEWS(): -# return {} - -# @hookspec -# def get_QUEUES(): -# return {} - - -############################################################## -# provided by abx.pydantic_pkgr.hookspec: -# @hookspec -# def get_BINARIES() -> Dict[str, BaseBinary]: -# return {} - -# @hookspec -# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]: -# return {} diff --git a/archivebox/abx/archivebox/reads.py b/archivebox/abx/archivebox/reads.py deleted file mode 100644 index 10ad6ecd..00000000 --- a/archivebox/abx/archivebox/reads.py +++ /dev/null @@ -1,160 +0,0 @@ -__package__ = 'abx.archivebox' - -import importlib -from typing import Dict, Set, Any, TYPE_CHECKING - -from benedict import benedict - -import abx -from .. import pm - -if TYPE_CHECKING: - from .base_configset import BaseConfigSet - from .base_binary import BaseBinary, BaseBinProvider - from .base_extractor import BaseExtractor - from .base_searchbackend import BaseSearchBackend - # from .base_replayer import BaseReplayer - # from .base_queue import BaseQueue - # from .base_admindataview import BaseAdminDataView - -# API exposed to ArchiveBox code - -def get_PLUGINS() -> Dict[str, Dict[str, Any]]: - return benedict({ - plugin_id: plugin - for plugin_dict in pm.hook.get_PLUGIN() - for plugin_id, plugin in plugin_dict.items() - }) - -def get_PLUGIN(plugin_id: str) -> Dict[str, Any]: - plugin_info = get_PLUGINS().get(plugin_id, {}) - package = plugin_info.get('package', plugin_info.get('PACKAGE', None)) - if not package: - return {'id': plugin_id, 'hooks': {}} - module = importlib.import_module(package) - hooks = abx.get_plugin_hooks(module.__package__) - assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks) - - return benedict({ - 'id': plugin_id, - 'label': getattr(module, '__label__', plugin_id), - 'module': module, - 'package': module.__package__, - 'hooks': hooks, - 'version': getattr(module, '__version__', '999.999.999'), - 'author': getattr(module, '__author__', 'Unknown'), - 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'), - 'dependencies': getattr(module, '__dependencies__', []), - 'source_code': module.__file__, - **plugin_info, - }) - - -def get_HOOKS() -> Set[str]: - return { - hook_name - for plugin_id in get_PLUGINS().keys() - for hook_name in get_PLUGIN(plugin_id).hooks - } - -def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet'] - return benedict({ - config_id: configset - for plugin_configs in pm.hook.get_CONFIG() - for config_id, configset in plugin_configs.items() - }) - - -def get_FLAT_CONFIG() -> Dict[str, Any]: - return benedict({ - key: value - for configset in get_CONFIGS().values() - for key, value in configset.model_dump().items() - }) - -def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']: - # TODO: move these to plugins - from abx.archivebox.base_binary import apt, brew, env - builtin_binproviders = { - 'env': env, - 'apt': apt, - 'brew': brew, - } - - return benedict({ - binprovider_id: binprovider - for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()] - for binprovider_id, binprovider in plugin_binproviders.items() - }) - -def get_BINARIES() -> Dict[str, 'BaseBinary']: - return benedict({ - binary_id: binary - for plugin_binaries in pm.hook.get_BINARIES() - for binary_id, binary in plugin_binaries.items() - }) - -def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']: - return benedict({ - extractor_id: extractor - for plugin_extractors in pm.hook.get_EXTRACTORS() - for extractor_id, extractor in plugin_extractors.items() - }) - -# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']: -# return benedict({ -# replayer.id: replayer -# for plugin_replayers in pm.hook.get_REPLAYERS() -# for replayer in plugin_replayers -# }) - -# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']: -# return benedict({ -# admin_dataview.id: admin_dataview -# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS() -# for admin_dataview in plugin_admin_dataviews -# }) - -# def get_QUEUES() -> Dict[str, 'BaseQueue']: -# return benedict({ -# queue.id: queue -# for plugin_queues in pm.hook.get_QUEUES() -# for queue in plugin_queues -# }) - -def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']: - return benedict({ - searchbackend_id: searchbackend - for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() - for searchbackend_id,searchbackend in plugin_searchbackends.items() - }) - - - -def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None): - """Get all the relevant config for the given scope, in correct precedence order""" - - from django.conf import settings - default_config: benedict = defaults or settings.CONFIG - - snapshot = snapshot or (archiveresult and archiveresult.snapshot) - crawl = crawl or (snapshot and snapshot.crawl) - seed = seed or (crawl and crawl.seed) - persona = persona or (crawl and crawl.persona) - - persona_config = persona.config if persona else {} - seed_config = seed.config if seed else {} - crawl_config = crawl.config if crawl else {} - snapshot_config = snapshot.config if snapshot else {} - archiveresult_config = archiveresult.config if archiveresult else {} - extra_config = extra_config or {} - - return { - **default_config, # defaults / config file / environment variables - **persona_config, # lowest precedence - **seed_config, - **crawl_config, - **snapshot_config, - **archiveresult_config, - **extra_config, # highest precedence - } diff --git a/archivebox/abx/django/__init__.py b/archivebox/abx/django/__init__.py deleted file mode 100644 index 56fe8ddd..00000000 --- a/archivebox/abx/django/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'abx.django' diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py deleted file mode 100644 index a52ada3b..00000000 --- a/archivebox/abx/django/use.py +++ /dev/null @@ -1,101 +0,0 @@ -__package__ = 'abx.django' - -import itertools -# from benedict import benedict - -from .. import pm - - -def get_INSTALLED_APPS(): - return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS())) - -# def register_INSTALLLED_APPS(INSTALLED_APPS): -# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS) - - -def get_MIDDLEWARES(): - return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE())) - -# def register_MIDDLEWARES(MIDDLEWARE): -# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE) - - -def get_AUTHENTICATION_BACKENDS(): - return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS())) - -# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): -# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS) - - -def get_STATICFILES_DIRS(): - return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS())) - -# def register_STATICFILES_DIRS(STATICFILES_DIRS): -# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS) - - -def get_TEMPLATE_DIRS(): - return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS())) - -# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): -# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS) - -def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'): - HUEY_QUEUES = {} - for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME): - HUEY_QUEUES.update(plugin_result) - return HUEY_QUEUES - -# def register_DJANGO_HUEY(DJANGO_HUEY): -# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY) - -def get_ADMIN_DATA_VIEWS_URLS(): - return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS())) - -# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): -# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS) - - -# def register_settings(settings): -# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation -# settings_as_obj = benedict(settings, keypath_separator=None) - -# # set default values for settings that are used by plugins -# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', []) -# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', []) -# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', []) -# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', []) -# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', []) -# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}}) -# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []}) - -# # # call all the hook functions to mutate the settings values in-place -# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS) -# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE) -# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS) -# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS) -# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS) -# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY) -# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS) - -# # calls Plugin.settings(settings) on each registered plugin -# pm.hook.register_settings(settings=settings_as_obj) - -# # then finally update the settings globals() object will all the new settings -# # settings.update(settings_as_obj) - - -def get_urlpatterns(): - return list(itertools.chain(*pm.hook.urlpatterns())) - -def register_urlpatterns(urlpatterns): - pm.hook.register_urlpatterns(urlpatterns=urlpatterns) - - -def register_checks(): - """register any django system checks""" - pm.hook.register_checks() - -def register_admin(admin_site): - """register any django admin models/views with the main django admin site instance""" - pm.hook.register_admin(admin_site=admin_site) diff --git a/archivebox/abx/hookspec.py b/archivebox/abx/hookspec.py deleted file mode 100644 index a25f7673..00000000 --- a/archivebox/abx/hookspec.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - -from pluggy import HookimplMarker -from pluggy import HookspecMarker - -spec = hookspec = HookspecMarker("abx") -impl = hookimpl = HookimplMarker("abx") - - -@hookspec -@hookimpl -def get_system_user() -> str: - # Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases - # - sudo (EUD != UID != SUID) - # - running with an autodetected UID based on data dir ownership - # but mapping of UID:username is broken because it was created - # by a different host system, e.g. 911's $HOME outside of docker - # might be /usr/lib/lxd instead of /home/archivebox - # - running as a user that doens't have a home directory - # - home directory is set to a path that doesn't exist, or is inside a dir we cant read - return Path('~').expanduser().name - diff --git a/archivebox/abx/manager.py b/archivebox/abx/manager.py deleted file mode 100644 index 8d44a087..00000000 --- a/archivebox/abx/manager.py +++ /dev/null @@ -1,30 +0,0 @@ -import inspect - -import pluggy - - -class PluginManager(pluggy.PluginManager): - """ - Patch to fix pluggy's PluginManager to work with pydantic models. - See: https://github.com/pytest-dev/pluggy/pull/536 - """ - def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None: - # IMPORTANT: @property methods can have side effects, and are never hookimpl - # if attr is a property, skip it in advance - plugin_class = plugin if inspect.isclass(plugin) else type(plugin) - if isinstance(getattr(plugin_class, name, None), property): - return None - - # pydantic model fields are like attrs and also can never be hookimpls - plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__") - if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}): - # pydantic models mess with the class and attr __signature__ - # so inspect.isroutine(...) throws exceptions and cant be used - return None - - try: - return super().parse_hookimpl_opts(plugin, name) - except AttributeError: - return super().parse_hookimpl_opts(type(plugin), name) - -pm = PluginManager("abx") diff --git a/archivebox/abx/pydantic_pkgr/__init__.py b/archivebox/abx/pydantic_pkgr/__init__.py deleted file mode 100644 index 28cd0f81..00000000 --- a/archivebox/abx/pydantic_pkgr/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'abx.pydantic_pkgr' diff --git a/archivebox/abx/pydantic_pkgr/hookspec.py b/archivebox/abx/pydantic_pkgr/hookspec.py deleted file mode 100644 index 6b293abb..00000000 --- a/archivebox/abx/pydantic_pkgr/hookspec.py +++ /dev/null @@ -1,13 +0,0 @@ - -from ..hookspec import hookspec - -########################################################################################### - -@hookspec -def get_BINPROVIDERS(): - return {} - -@hookspec -def get_BINARIES(): - return {} - diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 2b9e7edb..88858156 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -9,9 +9,6 @@ from pathlib import Path from django.utils.crypto import get_random_string import abx -import abx.archivebox -import abx.archivebox.reads -import abx.django.use from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa @@ -26,43 +23,22 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v ################################################################################ PLUGIN_HOOKSPECS = [ - 'abx.django.hookspec', - 'abx.pydantic_pkgr.hookspec', - 'abx.archivebox.hookspec', + 'abx_spec_django', + 'abx_spec_pydantic_pkgr', + 'abx_spec_config', + 'abx_spec_archivebox', ] abx.register_hookspecs(PLUGIN_HOOKSPECS) -BUILTIN_PLUGIN_DIRS = { - 'archivebox': PACKAGE_DIR, - 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg', - 'plugins_auth': PACKAGE_DIR / 'plugins_auth', - 'plugins_search': PACKAGE_DIR / 'plugins_search', - 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor', -} -USER_PLUGIN_DIRS = { - # 'user_plugins': DATA_DIR / 'user_plugins', -} +SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx') +USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins') -# Discover ArchiveBox plugins -BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS) -PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox') -USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS) -ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} +ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS} # Load ArchiveBox plugins -PLUGIN_MANAGER = abx.pm -abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS) -PLUGINS = abx.archivebox.reads.get_PLUGINS() +abx.load_plugins(ALL_PLUGINS) -# Load ArchiveBox config from plugins -CONFIGS = abx.archivebox.reads.get_CONFIGS() -CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG() -BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS() -BINARIES = abx.archivebox.reads.get_BINARIES() -EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS() -SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS() -# REPLAYERS = abx.archivebox.reads.get_REPLAYERS() -# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS() +# # Load ArchiveBox config from plugins ################################################################################ @@ -110,7 +86,7 @@ INSTALLED_APPS = [ 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. # ArchiveBox plugins - *abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, + *abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, # 3rd-party apps from PyPI that need to be loaded last 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin @@ -135,7 +111,7 @@ MIDDLEWARE = [ 'core.middleware.ReverseProxyAuthMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'core.middleware.CacheControlMiddleware', - *abx.django.use.get_MIDDLEWARES(), + *abx.as_list(abx.pm.hook.get_MIDDLEWARES()), ] @@ -148,7 +124,7 @@ MIDDLEWARE = [ AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', - *abx.django.use.get_AUTHENTICATION_BACKENDS(), + *abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()), ] @@ -169,7 +145,7 @@ AUTHENTICATION_BACKENDS = [ STATIC_URL = '/static/' TEMPLATES_DIR_NAME = 'templates' -CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() +CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) STATICFILES_DIRS = [ *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []), # *[ @@ -177,7 +153,7 @@ STATICFILES_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'static').is_dir() # ], - *abx.django.use.get_STATICFILES_DIRS(), + *abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), ] @@ -188,7 +164,7 @@ TEMPLATE_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'templates').is_dir() # ], - *abx.django.use.get_TEMPLATE_DIRS(), + *abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME), @@ -292,7 +268,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file "queues": { HUEY["name"]: HUEY.copy(), # more registered here at plugin import-time by BaseQueue.register() - **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME), + **abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)), }, } @@ -517,7 +493,7 @@ ADMIN_DATA_VIEWS = { "name": "log", }, }, - *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(), + *abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()), ], } @@ -611,7 +587,4 @@ if DEBUG_REQUESTS_TRACKER: # JET_TOKEN = 'some-api-token-here' -abx.django.use.register_checks() -# abx.archivebox.reads.register_all_hooks(globals()) - # import ipdb; ipdb.set_trace() diff --git a/archivebox/plugins_pkg/npm/binproviders.py b/archivebox/plugins_pkg/npm/binproviders.py deleted file mode 100644 index b1b83168..00000000 --- a/archivebox/plugins_pkg/npm/binproviders.py +++ /dev/null @@ -1,42 +0,0 @@ -__package__ = 'plugins_pkg.npm' - -from pathlib import Path -from typing import Optional - -from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName - -from archivebox.config import DATA_DIR, CONSTANTS - -from abx.archivebox.base_binary import BaseBinProvider - - - -OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin' -NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin' - - -class SystemNpmBinProvider(NpmProvider, BaseBinProvider): - name: BinProviderName = "sys_npm" - - npm_prefix: Optional[Path] = None - - -class LibNpmBinProvider(NpmProvider, BaseBinProvider): - name: BinProviderName = "lib_npm" - PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' - - npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm' - - def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm' - self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' - - super().setup() - - -SYS_NPM_BINPROVIDER = SystemNpmBinProvider() -LIB_NPM_BINPROVIDER = LibNpmBinProvider() -npm = LIB_NPM_BINPROVIDER diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py index a997acbb..fcd93405 100644 --- a/archivebox/vendor/__init__.py +++ b/archivebox/vendor/__init__.py @@ -8,8 +8,8 @@ VENDORED_LIBS = { # sys.path dir: library name #'python-atomicwrites': 'atomicwrites', #'django-taggit': 'taggit', - 'pydantic-pkgr': 'pydantic_pkgr', - 'pocket': 'pocket', + # 'pydantic-pkgr': 'pydantic_pkgr', + # 'pocket': 'pocket', #'base32-crockford': 'base32_crockford', } diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket deleted file mode 160000 index e7970b63..00000000 --- a/archivebox/vendor/pocket +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5 diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr deleted file mode 160000 index a774f246..00000000 --- a/archivebox/vendor/pydantic-pkgr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a774f24644ee14f14fa2cc3d8e6e0a585ae00fdd diff --git a/click_test.py b/click_test.py new file mode 100644 index 00000000..52d1d6e1 --- /dev/null +++ b/click_test.py @@ -0,0 +1,32 @@ +import sys +import click +from rich import print +from archivebox.config.django import setup_django + +setup_django() + +import abx.archivebox.writes + + +def parse_stdin_to_args(io=sys.stdin): + for line in io.read().split('\n'): + for url_or_id in line.split(' '): + if url_or_id.strip(): + yield url_or_id.strip() + + +# Gather data from stdin in case using a pipe +if not sys.stdin.isatty(): + sys.argv += parse_stdin_to_args(sys.stdin) + + +@click.command() +@click.argument("snapshot_ids_or_urls", type=str, nargs=-1) +def extract(snapshot_ids_or_urls): + for url_or_snapshot_id in snapshot_ids_or_urls: + print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr) + for result in abx.archivebox.writes.extract(url_or_snapshot_id): + print(result) + +if __name__ == "__main__": + extract() diff --git a/archivebox/plugins_auth/__init__.py b/packages/abx-plugin-archivedotorg-extractor/README.md similarity index 100% rename from archivebox/plugins_auth/__init__.py rename to packages/abx-plugin-archivedotorg-extractor/README.md diff --git a/archivebox/plugins_extractor/archivedotorg/__init__.py b/packages/abx-plugin-archivedotorg-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/archivedotorg/__init__.py rename to packages/abx-plugin-archivedotorg-extractor/__init__.py diff --git a/archivebox/plugins_extractor/archivedotorg/config.py b/packages/abx-plugin-archivedotorg-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/archivedotorg/config.py rename to packages/abx-plugin-archivedotorg-extractor/config.py diff --git a/packages/abx-plugin-archivedotorg-extractor/pyproject.toml b/packages/abx-plugin-archivedotorg-extractor/pyproject.toml new file mode 100644 index 00000000..8754b4bd --- /dev/null +++ b/packages/abx-plugin-archivedotorg-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-archivedotorg-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_extractor/__init__.py b/packages/abx-plugin-chrome-extractor/README.md similarity index 100% rename from archivebox/plugins_extractor/__init__.py rename to packages/abx-plugin-chrome-extractor/README.md diff --git a/archivebox/plugins_extractor/chrome/__init__.py b/packages/abx-plugin-chrome-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/chrome/__init__.py rename to packages/abx-plugin-chrome-extractor/__init__.py diff --git a/archivebox/plugins_extractor/chrome/binaries.py b/packages/abx-plugin-chrome-extractor/binaries.py similarity index 84% rename from archivebox/plugins_extractor/chrome/binaries.py rename to packages/abx-plugin-chrome-extractor/binaries.py index 59573d93..a79b66a2 100644 --- a/archivebox/plugins_extractor/chrome/binaries.py +++ b/packages/abx-plugin-chrome-extractor/binaries.py @@ -13,15 +13,15 @@ from pydantic_pkgr import ( bin_abspath, ) +import abx.archivebox.reads from abx.archivebox.base_binary import BaseBinary, env, apt, brew -# Depends on Other Plugins: -from archivebox.config.common import SHELL_CONFIG -from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER -from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER +from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER +from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER from .config import CHROME_CONFIG + CHROMIUM_BINARY_NAMES_LINUX = [ "chromium", "chromium-browser", @@ -48,12 +48,13 @@ CHROME_BINARY_NAMES_MACOS = [ ] CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS -APT_DEPENDENCIES = [ - 'apt-transport-https', 'at-spi2-common', 'chromium-browser', +CHROME_APT_DEPENDENCIES = [ + 'apt-transport-https', 'at-spi2-common', 'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei', 'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2', 'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1', 'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings', + 'chromium-browser', ] @@ -95,7 +96,7 @@ class ChromeBinary(BaseBinary): 'packages': ['chromium'], # playwright install chromium }, apt.name: { - 'packages': APT_DEPENDENCIES, + 'packages': CHROME_APT_DEPENDENCIES, }, brew.name: { 'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [], @@ -104,10 +105,9 @@ class ChromeBinary(BaseBinary): @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: - from archivebox.config.common import STORAGE_CONFIG - bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin' + bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin' - if not (binary.abspath and os.access(binary.abspath, os.F_OK)): + if not (binary.abspath and os.path.isfile(binary.abspath)): return bin_dir.mkdir(parents=True, exist_ok=True) @@ -121,7 +121,7 @@ class ChromeBinary(BaseBinary): # otherwise on linux we can symlink directly to binary executable symlink.unlink(missing_ok=True) symlink.symlink_to(binary.abspath) - except Exception as err: + except Exception: # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') # not actually needed, we can just run without it pass @@ -132,14 +132,17 @@ class ChromeBinary(BaseBinary): Cleans up any state or runtime files that chrome leaves behind when killed by a timeout or other error """ - lock_file = Path("~/.config/chromium/SingletonLock").expanduser() - - if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK): - lock_file.unlink() + try: + linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser() + linux_lock_file.unlink(missing_ok=True) + except Exception: + pass if CHROME_CONFIG.CHROME_USER_DATA_DIR: - if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK): - lock_file.unlink() + try: + (CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True) + except Exception: + pass diff --git a/archivebox/plugins_extractor/chrome/config.py b/packages/abx-plugin-chrome-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/chrome/config.py rename to packages/abx-plugin-chrome-extractor/config.py diff --git a/packages/abx-plugin-chrome-extractor/pyproject.toml b/packages/abx-plugin-chrome-extractor/pyproject.toml new file mode 100644 index 00000000..6676882c --- /dev/null +++ b/packages/abx-plugin-chrome-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-chrome-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_pkg/__init__.py b/packages/abx-plugin-curl-extractor/README.md similarity index 100% rename from archivebox/plugins_pkg/__init__.py rename to packages/abx-plugin-curl-extractor/README.md diff --git a/archivebox/plugins_extractor/curl/__init__.py b/packages/abx-plugin-curl-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/curl/__init__.py rename to packages/abx-plugin-curl-extractor/__init__.py diff --git a/archivebox/plugins_extractor/curl/binaries.py b/packages/abx-plugin-curl-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/curl/binaries.py rename to packages/abx-plugin-curl-extractor/binaries.py diff --git a/archivebox/plugins_extractor/curl/config.py b/packages/abx-plugin-curl-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/curl/config.py rename to packages/abx-plugin-curl-extractor/config.py diff --git a/packages/abx-plugin-curl-extractor/pyproject.toml b/packages/abx-plugin-curl-extractor/pyproject.toml new file mode 100644 index 00000000..9bd6f396 --- /dev/null +++ b/packages/abx-plugin-curl-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-curl-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_search/__init__.py b/packages/abx-plugin-default-binproviders/README.md similarity index 100% rename from archivebox/plugins_search/__init__.py rename to packages/abx-plugin-default-binproviders/README.md diff --git a/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py new file mode 100644 index 00000000..2a628a4e --- /dev/null +++ b/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py @@ -0,0 +1,24 @@ + +import abx + +from typing import Dict + +from pydantic_pkgr import ( + AptProvider, + BrewProvider, + EnvProvider, + BinProvider, +) +apt = APT_BINPROVIDER = AptProvider() +brew = BREW_BINPROVIDER = BrewProvider() +env = ENV_BINPROVIDER = EnvProvider() + + +@abx.hookimpl(tryfirst=True) +def get_BINPROVIDERS() -> Dict[str, BinProvider]: + + return { + 'apt': APT_BINPROVIDER, + 'brew': BREW_BINPROVIDER, + 'env': ENV_BINPROVIDER, + } diff --git a/packages/abx-plugin-default-binproviders/pyproject.toml b/packages/abx-plugin-default-binproviders/pyproject.toml new file mode 100644 index 00000000..3f8fec96 --- /dev/null +++ b/packages/abx-plugin-default-binproviders/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-default-binproviders" +version = "2024.10.24" +description = "Default BinProviders for ABX (apt, brew, env)" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_default_binproviders = "abx_plugin_default_binproviders" diff --git a/packages/abx-plugin-favicon-extractor/README.md b/packages/abx-plugin-favicon-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/favicon/__init__.py b/packages/abx-plugin-favicon-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/favicon/__init__.py rename to packages/abx-plugin-favicon-extractor/__init__.py diff --git a/archivebox/plugins_extractor/favicon/config.py b/packages/abx-plugin-favicon-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/favicon/config.py rename to packages/abx-plugin-favicon-extractor/config.py diff --git a/packages/abx-plugin-favicon-extractor/pyproject.toml b/packages/abx-plugin-favicon-extractor/pyproject.toml new file mode 100644 index 00000000..96e62f6d --- /dev/null +++ b/packages/abx-plugin-favicon-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-favicon-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-git-extractor/README.md b/packages/abx-plugin-git-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/git/__init__.py b/packages/abx-plugin-git-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/git/__init__.py rename to packages/abx-plugin-git-extractor/__init__.py diff --git a/archivebox/plugins_extractor/git/binaries.py b/packages/abx-plugin-git-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/git/binaries.py rename to packages/abx-plugin-git-extractor/binaries.py diff --git a/archivebox/plugins_extractor/git/config.py b/packages/abx-plugin-git-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/git/config.py rename to packages/abx-plugin-git-extractor/config.py diff --git a/archivebox/plugins_extractor/git/extractors.py b/packages/abx-plugin-git-extractor/extractors.py similarity index 100% rename from archivebox/plugins_extractor/git/extractors.py rename to packages/abx-plugin-git-extractor/extractors.py diff --git a/packages/abx-plugin-git-extractor/pyproject.toml b/packages/abx-plugin-git-extractor/pyproject.toml new file mode 100644 index 00000000..4a7b375e --- /dev/null +++ b/packages/abx-plugin-git-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-git-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-htmltotext-extractor/README.md b/packages/abx-plugin-htmltotext-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/htmltotext/__init__.py b/packages/abx-plugin-htmltotext-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/htmltotext/__init__.py rename to packages/abx-plugin-htmltotext-extractor/__init__.py diff --git a/archivebox/plugins_extractor/htmltotext/config.py b/packages/abx-plugin-htmltotext-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/htmltotext/config.py rename to packages/abx-plugin-htmltotext-extractor/config.py diff --git a/packages/abx-plugin-htmltotext-extractor/pyproject.toml b/packages/abx-plugin-htmltotext-extractor/pyproject.toml new file mode 100644 index 00000000..2e26cb25 --- /dev/null +++ b/packages/abx-plugin-htmltotext-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-htmltotext-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-ldap-auth/README.md b/packages/abx-plugin-ldap-auth/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_auth/ldap/__init__.py b/packages/abx-plugin-ldap-auth/__init__.py similarity index 100% rename from archivebox/plugins_auth/ldap/__init__.py rename to packages/abx-plugin-ldap-auth/__init__.py diff --git a/archivebox/plugins_auth/ldap/binaries.py b/packages/abx-plugin-ldap-auth/binaries.py similarity index 100% rename from archivebox/plugins_auth/ldap/binaries.py rename to packages/abx-plugin-ldap-auth/binaries.py diff --git a/archivebox/plugins_auth/ldap/config.py b/packages/abx-plugin-ldap-auth/config.py similarity index 100% rename from archivebox/plugins_auth/ldap/config.py rename to packages/abx-plugin-ldap-auth/config.py diff --git a/packages/abx-plugin-ldap-auth/pyproject.toml b/packages/abx-plugin-ldap-auth/pyproject.toml new file mode 100644 index 00000000..1db98ebd --- /dev/null +++ b/packages/abx-plugin-ldap-auth/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "abx-ldap-auth" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] + + +[project.entry-points.abx] +ldap = "abx_ldap_auth" + + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +packages = ["."] + +[tool.hatch.build.targets.wheel] +packages = ["."] diff --git a/packages/abx-plugin-mercury-extractor/README.md b/packages/abx-plugin-mercury-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/mercury/__init__.py b/packages/abx-plugin-mercury-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/mercury/__init__.py rename to packages/abx-plugin-mercury-extractor/__init__.py diff --git a/archivebox/plugins_extractor/mercury/binaries.py b/packages/abx-plugin-mercury-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/mercury/binaries.py rename to packages/abx-plugin-mercury-extractor/binaries.py diff --git a/archivebox/plugins_extractor/mercury/config.py b/packages/abx-plugin-mercury-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/mercury/config.py rename to packages/abx-plugin-mercury-extractor/config.py diff --git a/archivebox/plugins_extractor/mercury/extractors.py b/packages/abx-plugin-mercury-extractor/extractors.py similarity index 100% rename from archivebox/plugins_extractor/mercury/extractors.py rename to packages/abx-plugin-mercury-extractor/extractors.py diff --git a/packages/abx-plugin-mercury-extractor/pyproject.toml b/packages/abx-plugin-mercury-extractor/pyproject.toml new file mode 100644 index 00000000..35415a1d --- /dev/null +++ b/packages/abx-plugin-mercury-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-mercury-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-npm-binprovider/README.md b/packages/abx-plugin-npm-binprovider/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/npm/__init__.py b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py similarity index 63% rename from archivebox/plugins_pkg/npm/__init__.py rename to packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py index 921d42e4..3901516e 100644 --- a/archivebox/plugins_pkg/npm/__init__.py +++ b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py @@ -1,26 +1,12 @@ -__package__ = 'plugins_pkg.npm' -__version__ = '2024.10.14' +__package__ = 'abx_plugin_npm_binprovider' __id__ = 'npm' -__label__ = 'npm' +__label__ = 'NPM' __author__ = 'ArchiveBox' __homepage__ = 'https://www.npmjs.com/' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import NPM_CONFIG diff --git a/archivebox/plugins_pkg/npm/binaries.py b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py similarity index 72% rename from archivebox/plugins_pkg/npm/binaries.py rename to packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py index dd9e6214..4f44fc4a 100644 --- a/archivebox/plugins_pkg/npm/binaries.py +++ b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py @@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm' from typing import List from pydantic import InstanceOf +from benedict import benedict -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides +from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides + +from abx_plugin_default_binproviders import get_BINPROVIDERS + +DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS()) +env = DEFAULT_BINPROVIDERS.env +apt = DEFAULT_BINPROVIDERS.apt +brew = DEFAULT_BINPROVIDERS.brew -from abx.archivebox.base_binary import BaseBinary, env, apt, brew - - -class NodeBinary(BaseBinary): +class NodeBinary(Binary): name: BinName = 'node' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] @@ -23,7 +28,7 @@ class NodeBinary(BaseBinary): NODE_BINARY = NodeBinary() -class NpmBinary(BaseBinary): +class NpmBinary(Binary): name: BinName = 'npm' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] @@ -35,7 +40,7 @@ class NpmBinary(BaseBinary): NPM_BINARY = NpmBinary() -class NpxBinary(BaseBinary): +class NpxBinary(Binary): name: BinName = 'npx' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py new file mode 100644 index 00000000..e0b26a90 --- /dev/null +++ b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py @@ -0,0 +1,39 @@ +import os +from pathlib import Path +from typing import Optional + +from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName + +import abx + +DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm') + +OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin' +NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin' + + +class SystemNpmBinProvider(NpmProvider): + name: BinProviderName = "sys_npm" + + npm_prefix: Optional[Path] = None + + +class LibNpmBinProvider(NpmProvider): + name: BinProviderName = "lib_npm" + PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' + + npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR + + def setup(self) -> None: + # update paths from config at runtime + LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR + + self.npm_prefix = LIB_DIR / 'npm' + self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' + + super().setup() + + +SYS_NPM_BINPROVIDER = SystemNpmBinProvider() +LIB_NPM_BINPROVIDER = LibNpmBinProvider() +npm = LIB_NPM_BINPROVIDER diff --git a/archivebox/plugins_pkg/npm/config.py b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py similarity index 79% rename from archivebox/plugins_pkg/npm/config.py rename to packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py index f69cfdd2..b937ed27 100644 --- a/archivebox/plugins_pkg/npm/config.py +++ b/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_pkg.npm' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config import BaseConfigSet ###################### Config ########################## diff --git a/packages/abx-plugin-npm-binprovider/pyproject.toml b/packages/abx-plugin-npm-binprovider/pyproject.toml new file mode 100644 index 00000000..5d614f90 --- /dev/null +++ b/packages/abx-plugin-npm-binprovider/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-npm-binprovider" +version = "2024.10.24" +description = "NPM binary provider plugin for ABX" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider" diff --git a/packages/abx-plugin-pip-binprovider/README.md b/packages/abx-plugin-pip-binprovider/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/pip/.plugin_order b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order similarity index 100% rename from archivebox/plugins_pkg/pip/.plugin_order rename to packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order diff --git a/archivebox/plugins_pkg/pip/__init__.py b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py similarity index 62% rename from archivebox/plugins_pkg/pip/__init__.py rename to packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py index c1be27b1..8445055f 100644 --- a/archivebox/plugins_pkg/pip/__init__.py +++ b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py @@ -1,33 +1,19 @@ -__package__ = 'plugins_pkg.pip' -__label__ = 'pip' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/pypa/pip' +__package__ = 'abx_plugin_pip_binprovider' +__id__ = 'pip' +__label__ = 'PIP' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'pip': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import PIP_CONFIG return { - 'pip': PIP_CONFIG + __id__: PIP_CONFIG } -@abx.hookimpl +@abx.hookimpl(tryfirst=True) def get_BINARIES(): from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY diff --git a/archivebox/plugins_pkg/pip/binaries.py b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py similarity index 84% rename from archivebox/plugins_pkg/pip/binaries.py rename to packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py index 3e451cfe..b1974250 100644 --- a/archivebox/plugins_pkg/pip/binaries.py +++ b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py @@ -1,4 +1,4 @@ -__package__ = 'plugins_pkg.pip' +__package__ = 'abx_plugin_pip_binprovider' import sys from pathlib import Path @@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator import django import django.db.backends.sqlite3.base from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer +from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer -from archivebox import VERSION -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew - -from archivebox.misc.logging import hint - -from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew ###################### Config ########################## +def get_archivebox_version(): + try: + from archivebox import VERSION + return VERSION + except Exception: + return None -class ArchiveboxBinary(BaseBinary): +class ArchiveboxBinary(Binary): name: BinName = 'archivebox' binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] overrides: BinaryOverrides = { - VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, - SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, - apt.name: {'packages': [], 'version': VERSION}, - brew.name: {'packages': [], 'version': VERSION}, + VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version}, + SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version}, + apt.name: {'packages': [], 'version': get_archivebox_version}, + brew.name: {'packages': [], 'version': get_archivebox_version}, } # @validate_call @@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary): ARCHIVEBOX_BINARY = ArchiveboxBinary() -class PythonBinary(BaseBinary): +class PythonBinary(Binary): name: BinName = 'python' binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] @@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__) LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version) LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) -class SqliteBinary(BaseBinary): +class SqliteBinary(Binary): name: BinName = 'sqlite' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) overrides: BinaryOverrides = { VENV_PIP_BINPROVIDER.name: { "abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None, @@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary): cursor.execute('SELECT JSON(\'{"a": "b"}\')') except django_sqlite3.OperationalError as exc: print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]') - hint([ - 'Upgrade your Python version or install the extension manually:', - 'https://code.djangoproject.com/wiki/JSON1Extension' - ]) + print( + '[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' + + ' https://code.djangoproject.com/wiki/JSON1Extension\n' + ) return self # @validate_call @@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__) LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3]) LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) -class DjangoBinary(BaseBinary): +class DjangoBinary(Binary): name: BinName = 'django' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) overrides: BinaryOverrides = { VENV_PIP_BINPROVIDER.name: { "abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None, @@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary): DJANGO_BINARY = DjangoBinary() -class PipBinary(BaseBinary): +class PipBinary(Binary): name: BinName = "pip" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] @@ -154,7 +155,7 @@ class PipBinary(BaseBinary): PIP_BINARY = PipBinary() -class PipxBinary(BaseBinary): +class PipxBinary(Binary): name: BinName = "pipx" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] diff --git a/archivebox/plugins_pkg/pip/binproviders.py b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py similarity index 76% rename from archivebox/plugins_pkg/pip/binproviders.py rename to packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py index e51dc780..1c245b62 100644 --- a/archivebox/plugins_pkg/pip/binproviders.py +++ b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py @@ -1,21 +1,26 @@ -__package__ = 'plugins_pkg.pip' - import os import sys import site from pathlib import Path from typing import Optional +from benedict import benedict + from pydantic_pkgr import PipProvider, BinName, BinProviderName -from archivebox.config import CONSTANTS +import abx -from abx.archivebox.base_binary import BaseBinProvider +from abx_plugin_default_binproviders import get_BINPROVIDERS + +DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS()) +env = DEFAULT_BINPROVIDERS.env +apt = DEFAULT_BINPROVIDERS.apt +brew = DEFAULT_BINPROVIDERS.brew ###################### Config ########################## -class SystemPipBinProvider(PipProvider, BaseBinProvider): +class SystemPipBinProvider(PipProvider): name: BinProviderName = "sys_pip" INSTALLER_BIN: BinName = "pip" @@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider): # never modify system pip packages return 'refusing to install packages globally with system pip, use a venv instead' -class SystemPipxBinProvider(PipProvider, BaseBinProvider): +class SystemPipxBinProvider(PipProvider): name: BinProviderName = "pipx" INSTALLER_BIN: BinName = "pipx" @@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider): IS_INSIDE_VENV = sys.prefix != sys.base_prefix -class VenvPipBinProvider(PipProvider, BaseBinProvider): +class VenvPipBinProvider(PipProvider): name: BinProviderName = "venv_pip" INSTALLER_BIN: BinName = "pip" @@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider): return None -class LibPipBinProvider(PipProvider, BaseBinProvider): +class LibPipBinProvider(PipProvider): name: BinProviderName = "lib_pip" INSTALLER_BIN: BinName = "pip" - pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv' + pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv') def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv' - + # update venv path to match most up-to-date LIB_DIR based on runtime config + LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR + self.pip_venv = LIB_DIR / 'pip' / 'venv' super().setup() SYS_PIP_BINPROVIDER = SystemPipBinProvider() diff --git a/archivebox/plugins_pkg/pip/config.py b/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py similarity index 100% rename from archivebox/plugins_pkg/pip/config.py rename to packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py diff --git a/packages/abx-plugin-pip-binprovider/pyproject.toml b/packages/abx-plugin-pip-binprovider/pyproject.toml new file mode 100644 index 00000000..3f6364e0 --- /dev/null +++ b/packages/abx-plugin-pip-binprovider/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "abx-plugin-pip-binprovider" +version = "2024.10.24" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", + "django>=5.0.0", +] + + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider" diff --git a/packages/abx-plugin-playwright-binprovider/README.md b/packages/abx-plugin-playwright-binprovider/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/playwright/__init__.py b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py similarity index 56% rename from archivebox/plugins_pkg/playwright/__init__.py rename to packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py index 0f66f42c..557f12c0 100644 --- a/archivebox/plugins_pkg/playwright/__init__.py +++ b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py @@ -1,30 +1,18 @@ -__package__ = 'plugins_pkg.playwright' -__label__ = 'playwright' -__version__ = '2024.10.14' +__package__ = 'abx_plugin_playwright_binprovider' +__id__ = 'playwright' +__label__ = 'Playwright' __author__ = 'ArchiveBox' __homepage__ = 'https://github.com/microsoft/playwright-python' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'playwright': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import PLAYWRIGHT_CONFIG return { - 'playwright': PLAYWRIGHT_CONFIG + __id__: PLAYWRIGHT_CONFIG } @abx.hookimpl diff --git a/archivebox/plugins_pkg/playwright/binaries.py b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py similarity index 52% rename from archivebox/plugins_pkg/playwright/binaries.py rename to packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py index 0ef63646..333da054 100644 --- a/archivebox/plugins_pkg/playwright/binaries.py +++ b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py @@ -1,20 +1,18 @@ -__package__ = 'plugins_pkg.playwright' +__package__ = 'abx_plugin_playwright_binprovider' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinName, BinProvider +from pydantic_pkgr import BinName, BinProvider, Binary -from abx.archivebox.base_binary import BaseBinary, env -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER +from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from abx_plugin_default_binproviders import env from .config import PLAYWRIGHT_CONFIG - - -class PlaywrightBinary(BaseBinary): +class PlaywrightBinary(Binary): name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env] diff --git a/archivebox/plugins_pkg/playwright/binproviders.py b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py similarity index 90% rename from archivebox/plugins_pkg/playwright/binproviders.py rename to packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py index 7d1238d5..8e472988 100644 --- a/archivebox/plugins_pkg/playwright/binproviders.py +++ b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py @@ -1,6 +1,7 @@ -__package__ = 'plugins_pkg.playwright' +__package__ = 'abx_plugin_playwright_binprovider' import os +import shutil import platform from pathlib import Path from typing import List, Optional, Dict, ClassVar @@ -8,6 +9,7 @@ from typing import List, Optional, Dict, ClassVar from pydantic import computed_field, Field from pydantic_pkgr import ( BinName, + BinProvider, BinProviderName, BinProviderOverrides, InstallArgs, @@ -18,11 +20,8 @@ from pydantic_pkgr import ( DEFAULT_ENV_PATH, ) -from archivebox.config import CONSTANTS +import abx -from abx.archivebox.base_binary import BaseBinProvider, env - -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER from .binaries import PLAYWRIGHT_BINARY @@ -31,11 +30,11 @@ MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright") LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright") -class PlaywrightBinProvider(BaseBinProvider): +class PlaywrightBinProvider(BinProvider): name: BinProviderName = "playwright" INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name - PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" + PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}" playwright_browsers_dir: Path = ( MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() @@ -59,12 +58,12 @@ class PlaywrightBinProvider(BaseBinProvider): return None def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" + # update paths from config at runtime + LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR + + self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" - assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized" + assert shutil.which('pip'), "Pip bin provider not initialized" if self.playwright_browsers_dir: self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True) diff --git a/archivebox/plugins_pkg/playwright/config.py b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py similarity index 59% rename from archivebox/plugins_pkg/playwright/config.py rename to packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py index 23f22efc..0c7c6a50 100644 --- a/archivebox/plugins_pkg/playwright/config.py +++ b/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py @@ -1,7 +1,4 @@ -__package__ = 'playwright' - -from abx.archivebox.base_configset import BaseConfigSet - +from abx_spec_config import BaseConfigSet class PlaywrightConfigs(BaseConfigSet): PLAYWRIGHT_BINARY: str = 'playwright' diff --git a/packages/abx-plugin-playwright-binprovider/pyproject.toml b/packages/abx-plugin-playwright-binprovider/pyproject.toml new file mode 100644 index 00000000..a6c8937b --- /dev/null +++ b/packages/abx-plugin-playwright-binprovider/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-playwright-binprovider" +version = "2024.10.24" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic>=2.4.2", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider" diff --git a/packages/abx-plugin-pocket-extractor/README.md b/packages/abx-plugin-pocket-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/pocket/__init__.py b/packages/abx-plugin-pocket-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/pocket/__init__.py rename to packages/abx-plugin-pocket-extractor/__init__.py diff --git a/archivebox/plugins_extractor/pocket/config.py b/packages/abx-plugin-pocket-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/pocket/config.py rename to packages/abx-plugin-pocket-extractor/config.py diff --git a/packages/abx-plugin-pocket-extractor/pyproject.toml b/packages/abx-plugin-pocket-extractor/pyproject.toml new file mode 100644 index 00000000..c9af2450 --- /dev/null +++ b/packages/abx-plugin-pocket-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-pocket-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-puppeteer-binprovider/README.md b/packages/abx-plugin-puppeteer-binprovider/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/puppeteer/__init__.py b/packages/abx-plugin-puppeteer-binprovider/__init__.py similarity index 100% rename from archivebox/plugins_pkg/puppeteer/__init__.py rename to packages/abx-plugin-puppeteer-binprovider/__init__.py diff --git a/archivebox/plugins_pkg/puppeteer/binaries.py b/packages/abx-plugin-puppeteer-binprovider/binaries.py similarity index 100% rename from archivebox/plugins_pkg/puppeteer/binaries.py rename to packages/abx-plugin-puppeteer-binprovider/binaries.py diff --git a/archivebox/plugins_pkg/puppeteer/binproviders.py b/packages/abx-plugin-puppeteer-binprovider/binproviders.py similarity index 96% rename from archivebox/plugins_pkg/puppeteer/binproviders.py rename to packages/abx-plugin-puppeteer-binprovider/binproviders.py index 2ef0eb7a..0fa9ca33 100644 --- a/archivebox/plugins_pkg/puppeteer/binproviders.py +++ b/packages/abx-plugin-puppeteer-binprovider/binproviders.py @@ -42,7 +42,8 @@ class PuppeteerBinProvider(BaseBinProvider): _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} def setup(self) -> None: - # update paths from config + # update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time + # we want to avoid depending on archivebox from abx code if at all possible from archivebox.config.common import STORAGE_CONFIG self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers' self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin') diff --git a/archivebox/plugins_pkg/puppeteer/config.py b/packages/abx-plugin-puppeteer-binprovider/config.py similarity index 100% rename from archivebox/plugins_pkg/puppeteer/config.py rename to packages/abx-plugin-puppeteer-binprovider/config.py diff --git a/packages/abx-plugin-puppeteer-binprovider/pyproject.toml b/packages/abx-plugin-puppeteer-binprovider/pyproject.toml new file mode 100644 index 00000000..e901ca88 --- /dev/null +++ b/packages/abx-plugin-puppeteer-binprovider/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-puppeteer-binprovider" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-readability-extractor/README.md b/packages/abx-plugin-readability-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/readability/__init__.py b/packages/abx-plugin-readability-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/readability/__init__.py rename to packages/abx-plugin-readability-extractor/__init__.py diff --git a/archivebox/plugins_extractor/readability/binaries.py b/packages/abx-plugin-readability-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/readability/binaries.py rename to packages/abx-plugin-readability-extractor/binaries.py diff --git a/archivebox/plugins_extractor/readability/config.py b/packages/abx-plugin-readability-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/readability/config.py rename to packages/abx-plugin-readability-extractor/config.py diff --git a/archivebox/plugins_extractor/readability/extractors.py b/packages/abx-plugin-readability-extractor/extractors.py similarity index 100% rename from archivebox/plugins_extractor/readability/extractors.py rename to packages/abx-plugin-readability-extractor/extractors.py diff --git a/packages/abx-plugin-readability-extractor/pyproject.toml b/packages/abx-plugin-readability-extractor/pyproject.toml new file mode 100644 index 00000000..5caa0adb --- /dev/null +++ b/packages/abx-plugin-readability-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-readability-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-readwise-extractor/README.md b/packages/abx-plugin-readwise-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/readwise/__init__.py b/packages/abx-plugin-readwise-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/readwise/__init__.py rename to packages/abx-plugin-readwise-extractor/__init__.py diff --git a/archivebox/plugins_extractor/readwise/config.py b/packages/abx-plugin-readwise-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/readwise/config.py rename to packages/abx-plugin-readwise-extractor/config.py diff --git a/packages/abx-plugin-readwise-extractor/pyproject.toml b/packages/abx-plugin-readwise-extractor/pyproject.toml new file mode 100644 index 00000000..7df49b56 --- /dev/null +++ b/packages/abx-plugin-readwise-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-readwise-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-ripgrep-search/README.md b/packages/abx-plugin-ripgrep-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_search/ripgrep/__init__.py b/packages/abx-plugin-ripgrep-search/__init__.py similarity index 100% rename from archivebox/plugins_search/ripgrep/__init__.py rename to packages/abx-plugin-ripgrep-search/__init__.py diff --git a/archivebox/plugins_search/ripgrep/binaries.py b/packages/abx-plugin-ripgrep-search/binaries.py similarity index 100% rename from archivebox/plugins_search/ripgrep/binaries.py rename to packages/abx-plugin-ripgrep-search/binaries.py diff --git a/archivebox/plugins_search/ripgrep/config.py b/packages/abx-plugin-ripgrep-search/config.py similarity index 100% rename from archivebox/plugins_search/ripgrep/config.py rename to packages/abx-plugin-ripgrep-search/config.py diff --git a/packages/abx-plugin-ripgrep-search/pyproject.toml b/packages/abx-plugin-ripgrep-search/pyproject.toml new file mode 100644 index 00000000..c79821d1 --- /dev/null +++ b/packages/abx-plugin-ripgrep-search/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-ripgrep-search" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_search/ripgrep/searchbackend.py b/packages/abx-plugin-ripgrep-search/searchbackend.py similarity index 100% rename from archivebox/plugins_search/ripgrep/searchbackend.py rename to packages/abx-plugin-ripgrep-search/searchbackend.py diff --git a/packages/abx-plugin-singlefile-extractor/README.md b/packages/abx-plugin-singlefile-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/singlefile/__init__.py b/packages/abx-plugin-singlefile-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/__init__.py rename to packages/abx-plugin-singlefile-extractor/__init__.py diff --git a/archivebox/plugins_extractor/singlefile/binaries.py b/packages/abx-plugin-singlefile-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/binaries.py rename to packages/abx-plugin-singlefile-extractor/binaries.py diff --git a/archivebox/plugins_extractor/singlefile/config.py b/packages/abx-plugin-singlefile-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/config.py rename to packages/abx-plugin-singlefile-extractor/config.py diff --git a/archivebox/plugins_extractor/singlefile/extractors.py b/packages/abx-plugin-singlefile-extractor/extractors.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/extractors.py rename to packages/abx-plugin-singlefile-extractor/extractors.py diff --git a/archivebox/plugins_extractor/singlefile/models.py b/packages/abx-plugin-singlefile-extractor/models.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/models.py rename to packages/abx-plugin-singlefile-extractor/models.py diff --git a/packages/abx-plugin-singlefile-extractor/pyproject.toml b/packages/abx-plugin-singlefile-extractor/pyproject.toml new file mode 100644 index 00000000..b0c9df1b --- /dev/null +++ b/packages/abx-plugin-singlefile-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-singlefile-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-plugin-sonic-search/README.md b/packages/abx-plugin-sonic-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_search/sonic/__init__.py b/packages/abx-plugin-sonic-search/__init__.py similarity index 100% rename from archivebox/plugins_search/sonic/__init__.py rename to packages/abx-plugin-sonic-search/__init__.py diff --git a/archivebox/plugins_search/sonic/binaries.py b/packages/abx-plugin-sonic-search/binaries.py similarity index 100% rename from archivebox/plugins_search/sonic/binaries.py rename to packages/abx-plugin-sonic-search/binaries.py diff --git a/archivebox/plugins_search/sonic/config.py b/packages/abx-plugin-sonic-search/config.py similarity index 100% rename from archivebox/plugins_search/sonic/config.py rename to packages/abx-plugin-sonic-search/config.py diff --git a/packages/abx-plugin-sonic-search/pyproject.toml b/packages/abx-plugin-sonic-search/pyproject.toml new file mode 100644 index 00000000..a61d17c7 --- /dev/null +++ b/packages/abx-plugin-sonic-search/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-sonic-search" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_search/sonic/searchbackend.py b/packages/abx-plugin-sonic-search/searchbackend.py similarity index 100% rename from archivebox/plugins_search/sonic/searchbackend.py rename to packages/abx-plugin-sonic-search/searchbackend.py diff --git a/packages/abx-plugin-sqlitefts-search/README.md b/packages/abx-plugin-sqlitefts-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_search/sqlitefts/__init__.py b/packages/abx-plugin-sqlitefts-search/__init__.py similarity index 100% rename from archivebox/plugins_search/sqlitefts/__init__.py rename to packages/abx-plugin-sqlitefts-search/__init__.py diff --git a/archivebox/plugins_search/sqlitefts/config.py b/packages/abx-plugin-sqlitefts-search/config.py similarity index 100% rename from archivebox/plugins_search/sqlitefts/config.py rename to packages/abx-plugin-sqlitefts-search/config.py diff --git a/packages/abx-plugin-sqlitefts-search/pyproject.toml b/packages/abx-plugin-sqlitefts-search/pyproject.toml new file mode 100644 index 00000000..f635fb16 --- /dev/null +++ b/packages/abx-plugin-sqlitefts-search/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-sqlitefts-search" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_search/sqlitefts/searchbackend.py b/packages/abx-plugin-sqlitefts-search/searchbackend.py similarity index 100% rename from archivebox/plugins_search/sqlitefts/searchbackend.py rename to packages/abx-plugin-sqlitefts-search/searchbackend.py diff --git a/packages/abx-plugin-wget-extractor/README.md b/packages/abx-plugin-wget-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/wget/__init__.py b/packages/abx-plugin-wget-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/wget/__init__.py rename to packages/abx-plugin-wget-extractor/__init__.py diff --git a/archivebox/plugins_extractor/wget/binaries.py b/packages/abx-plugin-wget-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/wget/binaries.py rename to packages/abx-plugin-wget-extractor/binaries.py diff --git a/archivebox/plugins_extractor/wget/config.py b/packages/abx-plugin-wget-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/wget/config.py rename to packages/abx-plugin-wget-extractor/config.py diff --git a/archivebox/plugins_extractor/wget/extractors.py b/packages/abx-plugin-wget-extractor/extractors.py similarity index 100% rename from archivebox/plugins_extractor/wget/extractors.py rename to packages/abx-plugin-wget-extractor/extractors.py diff --git a/packages/abx-plugin-wget-extractor/pyproject.toml b/packages/abx-plugin-wget-extractor/pyproject.toml new file mode 100644 index 00000000..21445c18 --- /dev/null +++ b/packages/abx-plugin-wget-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-wget-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/archivebox/plugins_extractor/wget/wget_util.py b/packages/abx-plugin-wget-extractor/wget_util.py similarity index 100% rename from archivebox/plugins_extractor/wget/wget_util.py rename to packages/abx-plugin-wget-extractor/wget_util.py diff --git a/packages/abx-plugin-ytdlp-extractor/README.md b/packages/abx-plugin-ytdlp-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/ytdlp/__init__.py b/packages/abx-plugin-ytdlp-extractor/__init__.py similarity index 100% rename from archivebox/plugins_extractor/ytdlp/__init__.py rename to packages/abx-plugin-ytdlp-extractor/__init__.py diff --git a/archivebox/plugins_extractor/ytdlp/binaries.py b/packages/abx-plugin-ytdlp-extractor/binaries.py similarity index 100% rename from archivebox/plugins_extractor/ytdlp/binaries.py rename to packages/abx-plugin-ytdlp-extractor/binaries.py diff --git a/archivebox/plugins_extractor/ytdlp/config.py b/packages/abx-plugin-ytdlp-extractor/config.py similarity index 100% rename from archivebox/plugins_extractor/ytdlp/config.py rename to packages/abx-plugin-ytdlp-extractor/config.py diff --git a/packages/abx-plugin-ytdlp-extractor/pyproject.toml b/packages/abx-plugin-ytdlp-extractor/pyproject.toml new file mode 100644 index 00000000..1b6b4e30 --- /dev/null +++ b/packages/abx-plugin-ytdlp-extractor/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "abx-ytdlp-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] diff --git a/packages/abx-spec-archivebox/README.md b/packages/abx-spec-archivebox/README.md new file mode 100644 index 00000000..e69de29b diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py new file mode 100644 index 00000000..5b646bf9 --- /dev/null +++ b/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py @@ -0,0 +1,7 @@ +__package__ = 'abx_spec_archivebox' + +# from .effects import * +# from .events import * +# from .reads import * +# from .writes import * +# from .states import * diff --git a/archivebox/abx/archivebox/effects.py b/packages/abx-spec-archivebox/abx_spec_archivebox/effects.py similarity index 100% rename from archivebox/abx/archivebox/effects.py rename to packages/abx-spec-archivebox/abx_spec_archivebox/effects.py diff --git a/archivebox/abx/archivebox/events.py b/packages/abx-spec-archivebox/abx_spec_archivebox/events.py similarity index 100% rename from archivebox/abx/archivebox/events.py rename to packages/abx-spec-archivebox/abx_spec_archivebox/events.py diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/reads.py b/packages/abx-spec-archivebox/abx_spec_archivebox/reads.py new file mode 100644 index 00000000..30d6667d --- /dev/null +++ b/packages/abx-spec-archivebox/abx_spec_archivebox/reads.py @@ -0,0 +1,33 @@ +__package__ = 'abx.archivebox' + + +from benedict import benedict + + +def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None): + """Get all the relevant config for the given scope, in correct precedence order""" + + from django.conf import settings + default_config: benedict = defaults or settings.CONFIG + + snapshot = snapshot or (archiveresult and archiveresult.snapshot) + crawl = crawl or (snapshot and snapshot.crawl) + seed = seed or (crawl and crawl.seed) + persona = persona or (crawl and crawl.persona) + + persona_config = persona.config if persona else {} + seed_config = seed.config if seed else {} + crawl_config = crawl.config if crawl else {} + snapshot_config = snapshot.config if snapshot else {} + archiveresult_config = archiveresult.config if archiveresult else {} + extra_config = extra_config or {} + + return benedict({ + **default_config, # defaults / config file / environment variables + **persona_config, # lowest precedence + **seed_config, + **crawl_config, + **snapshot_config, + **archiveresult_config, + **extra_config, # highest precedence + }) diff --git a/archivebox/abx/archivebox/states.py b/packages/abx-spec-archivebox/abx_spec_archivebox/states.py similarity index 100% rename from archivebox/abx/archivebox/states.py rename to packages/abx-spec-archivebox/abx_spec_archivebox/states.py diff --git a/archivebox/abx/archivebox/writes.py b/packages/abx-spec-archivebox/abx_spec_archivebox/writes.py similarity index 100% rename from archivebox/abx/archivebox/writes.py rename to packages/abx-spec-archivebox/abx_spec_archivebox/writes.py diff --git a/packages/abx-spec-archivebox/pyproject.toml b/packages/abx-spec-archivebox/pyproject.toml new file mode 100644 index 00000000..349698a7 --- /dev/null +++ b/packages/abx-spec-archivebox/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-archivebox" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_archivebox = "abx_spec_archivebox" diff --git a/packages/abx-spec-config/abx_spec_config/__init__.py b/packages/abx-spec-config/abx_spec_config/__init__.py new file mode 100644 index 00000000..cc840381 --- /dev/null +++ b/packages/abx-spec-config/abx_spec_config/__init__.py @@ -0,0 +1,50 @@ +import os +from pathlib import Path +from typing import Dict, Any + +from benedict import benedict + + +import abx + +from .base_configset import BaseConfigSet, ConfigKeyStr + + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_collection_config_path() -> Path: + return Path(os.getcwd()) / "ArchiveBox.conf" + + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_system_config_path() -> Path: + return Path('~/.config/abx/abx.conf').expanduser() + + +@abx.hookspec +@abx.hookimpl +def get_CONFIG() -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}""" + return {} + + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_CONFIGS() -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}""" + return abx.as_dict(abx.pm.hook.get_CONFIG()) + + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_FLAT_CONFIG() -> Dict[ConfigKeyStr, Any]: + """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}""" + return benedict({ + key: value + for configset in get_CONFIGS().values() + for key, value in benedict(configset).items() + }) + + +# TODO: add read_config_file(), write_config_file() hooks diff --git a/archivebox/abx/archivebox/base_configset.py b/packages/abx-spec-config/abx_spec_config/base_configset.py similarity index 73% rename from archivebox/abx/archivebox/base_configset.py rename to packages/abx-spec-config/abx_spec_config/base_configset.py index 706b9df8..434db331 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/packages/abx-spec-config/abx_spec_config/base_configset.py @@ -1,36 +1,32 @@ -__package__ = 'abx.archivebox' +__package__ = 'abx_spec_config' import os import sys import re from pathlib import Path from typing import Type, Tuple, Callable, ClassVar, Dict, Any +from typing_extensions import Annotated import toml from rich import print from benedict import benedict -from pydantic import model_validator, TypeAdapter, AliasChoices +from pydantic import model_validator, TypeAdapter, AliasChoices, AfterValidator from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource from pydantic_settings.sources import TomlConfigSettingsSource -from pydantic_pkgr import func_takes_args_or_kwargs - +import abx from . import toml_util -PACKAGE_DIR = Path(__file__).resolve().parent.parent -DATA_DIR = Path(os.getcwd()).resolve() - -ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf" -ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak" - AUTOFIXES_HEADER = "[AUTOFIXES]" AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:" _ALREADY_WARNED_ABOUT_UPDATED_CONFIG = set() +ConfigKeyStr = Annotated[str, AfterValidator(lambda x: x.isidentifier() and x.isupper() and not x.startswith('_'))] + class FlatTomlConfigSettingsSource(TomlConfigSettingsSource): """ @@ -98,9 +94,10 @@ class BaseConfigSet(BaseSettings): revalidate_instances="subclass-instances", ) - load_from_defaults: ClassVar[bool] = True - load_from_collection: ClassVar[bool] = True - load_from_environment: ClassVar[bool] = True + load_from_defaults: ClassVar[bool] = True # read from schema defaults + load_from_system: ClassVar[bool] = True # read from ~/.config/abx/abx.conf + load_from_collection: ClassVar[bool] = True # read from ./ArchiveBox.conf + load_from_environment: ClassVar[bool] = True # read from environment variables @classmethod def settings_customise_sources( @@ -115,49 +112,41 @@ class BaseConfigSet(BaseSettings): # import ipdb; ipdb.set_trace() - precedence_order = {} + default_configs = [init_settings] if cls.load_from_defaults else [] + system_configs = [] + collection_configs = [] + environment_configs = [env_settings] if cls.load_from_environment else [] - # if ArchiveBox.conf does not exist yet, return defaults -> env order - if not ARCHIVEBOX_CONFIG_FILE.is_file(): - precedence_order = { - 'defaults': init_settings, - 'environment': env_settings, - } + # load system config from ~/.config/abx/abx.conf + SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path() + if cls.load_from_system and os.path.isfile(SYSTEM_CONFIG_FILE): + try: + system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)] + except Exception as err: + if err.__class__.__name__ == "TOMLDecodeError": + convert_ini_to_toml(SYSTEM_CONFIG_FILE) + system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)] + else: + raise + + COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path() + if cls.load_from_collection and os.path.isfile(COLLECTION_CONFIG_FILE): + try: + collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)] + except Exception as err: + if err.__class__.__name__ == "TOMLDecodeError": + convert_ini_to_toml(COLLECTION_CONFIG_FILE) + collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)] + else: + raise - # if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order - try: - precedence_order = precedence_order or { - 'defaults': init_settings, - # 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'environment': env_settings, - } - except Exception as err: - if err.__class__.__name__ != "TOMLDecodeError": - raise - # if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order - - # Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak - original_ini = ARCHIVEBOX_CONFIG_FILE.read_text() - ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini) - new_toml = toml_util.convert(original_ini) - ARCHIVEBOX_CONFIG_FILE.write_text(new_toml) - - precedence_order = { - 'defaults': init_settings, - # 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'collection': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'environment': env_settings, - } - - if not cls.load_from_environment: - precedence_order.pop('environment') - if not cls.load_from_collection: - precedence_order.pop('collection') - if not cls.load_from_defaults: - precedence_order.pop('defaults') - - return tuple(precedence_order.values()) + precedence_order = [ + *default_configs, + *system_configs, + *collection_configs, + *environment_configs, + ] + return tuple(precedence_order) @model_validator(mode="after") def fill_defaults(self): @@ -175,7 +164,7 @@ class BaseConfigSet(BaseSettings): """Manual validation method, to be called from plugin/__init__.py:get_CONFIG()""" pass - def get_default_value(self, key): + def get_default_value(self, key: ConfigKeyStr): """Get the default value for a given config key""" field = self.model_fields[key] value = getattr(self, key) @@ -204,7 +193,9 @@ class BaseConfigSet(BaseSettings): Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it. SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue. """ - from archivebox.misc.toml_util import CustomTOMLEncoder + + COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path() + # SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path() # silence warnings if they've already been shown once if all(key in _ALREADY_WARNED_ABOUT_UPDATED_CONFIG for key in kwargs.keys()): @@ -224,10 +215,10 @@ class BaseConfigSet(BaseSettings): # if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section try: - if persist and ARCHIVEBOX_CONFIG_FILE.is_file(): - autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder()) + if persist and COLLECTION_CONFIG_FILE.is_file(): + autofixes_to_add = benedict(kwargs).to_toml(encoder=toml_util.CustomTOMLEncoder()) - existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip() + existing_config = COLLECTION_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip() if AUTOFIXES_HEADER in existing_config: existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip() else: @@ -240,7 +231,7 @@ class BaseConfigSet(BaseSettings): existing_autofixes, autofixes_to_add, ] if line.strip()).strip() + '\n' - ARCHIVEBOX_CONFIG_FILE.write_text(new_config) + COLLECTION_CONFIG_FILE.write_text(new_config) except Exception: pass self.__init__() @@ -250,7 +241,7 @@ class BaseConfigSet(BaseSettings): return self @property - def aliases(self) -> Dict[str, str]: + def aliases(self) -> Dict[ConfigKeyStr, ConfigKeyStr]: alias_map = {} for key, field in self.model_fields.items(): alias_map[key] = key @@ -276,7 +267,7 @@ class BaseConfigSet(BaseSettings): return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_') - def from_defaults(self) -> Dict[str, Any]: + def from_defaults(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the default values""" class OnlyDefaultsConfig(self.__class__): load_from_defaults = True @@ -284,7 +275,7 @@ class BaseConfigSet(BaseSettings): load_from_environment = False return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_collection(self) -> Dict[str, Any]: + def from_collection(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf""" class OnlyConfigFileConfig(self.__class__): load_from_defaults = False @@ -292,7 +283,7 @@ class BaseConfigSet(BaseSettings): load_from_environment = False return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_environment(self) -> Dict[str, Any]: + def from_environment(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the environment variables""" class OnlyEnvironmentConfig(self.__class__): load_from_defaults = False @@ -300,12 +291,12 @@ class BaseConfigSet(BaseSettings): load_from_environment = True return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_computed(self) -> Dict[str, Any]: + def from_computed(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the computed fields""" return benedict(self.model_dump(include=set(self.model_computed_fields.keys()))) - def to_toml_dict(self, defaults=False) -> Dict[str, Any]: + def to_toml_dict(self, defaults=False) -> Dict[ConfigKeyStr, Any]: """Get the current config as a TOML-ready dict""" config_dict = {} for key, value in benedict(self).items(): @@ -325,10 +316,24 @@ class BaseConfigSet(BaseSettings): return toml.dumps(toml_dict, encoder=CustomTOMLEncoder()) - def as_legacy_config_schema(self) -> Dict[str, Any]: - # shim for backwards compatibility with old config schema style - model_values = self.model_dump() - return benedict({ - key: {'type': field.annotation, 'default': model_values[key]} - for key, field in self.model_fields.items() - }) + + +def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool: + """returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless""" + code = lambda_func.__code__ + has_args = code.co_argcount > 0 + has_varargs = code.co_flags & 0x04 != 0 + has_varkw = code.co_flags & 0x08 != 0 + return has_args or has_varargs or has_varkw + + + + +def convert_ini_to_toml(ini_file: Path): + """Convert an INI file to a TOML file, saving the original to .ORIGINALNAME.bak""" + + bak_path = ini_file.parent / f'.{ini_file.name}.bak' + original_ini = ini_file.read_text() + bak_path.write_text(original_ini) + new_toml = toml_util.convert(original_ini) + ini_file.write_text(new_toml) diff --git a/archivebox/abx/archivebox/toml_util.py b/packages/abx-spec-config/abx_spec_config/toml_util.py similarity index 100% rename from archivebox/abx/archivebox/toml_util.py rename to packages/abx-spec-config/abx_spec_config/toml_util.py diff --git a/packages/abx-spec-config/pyproject.toml b/packages/abx-spec-config/pyproject.toml new file mode 100644 index 00000000..b85f675e --- /dev/null +++ b/packages/abx-spec-config/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-config" +version = "0.0.1" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.34.0", + "pydantic>=2.9.2", + "pydantic-settings>=2.6.0", + "rich>=13.9.3", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_config = "abx_spec_config" diff --git a/packages/abx-spec-django/README.md b/packages/abx-spec-django/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/abx/django/hookspec.py b/packages/abx-spec-django/abx_spec_django/__init__.py similarity index 79% rename from archivebox/abx/django/hookspec.py rename to packages/abx-spec-django/abx_spec_django/__init__.py index 87f8e520..20f62d2b 100644 --- a/archivebox/abx/django/hookspec.py +++ b/packages/abx-spec-django/abx_spec_django/__init__.py @@ -1,17 +1,16 @@ -__package__ = 'abx.django' - -from ..hookspec import hookspec - +import abx ########################################################################################### -@hookspec +@abx.hookspec +@abx.hookimpl def get_INSTALLED_APPS(): """Return a list of apps to add to INSTALLED_APPS""" # e.g. ['your_plugin_type.plugin_name'] - return [] + return ['abx_spec_django'] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_INSTALLED_APPS(INSTALLED_APPS): # """Mutate INSTALLED_APPS in place to add your app in a specific position""" # # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') @@ -19,72 +18,85 @@ def get_INSTALLED_APPS(): # pass -@hookspec +@abx.hookspec +@abx.hookimpl def get_TEMPLATE_DIRS(): return [] # e.g. ['your_plugin_type/plugin_name/templates'] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_TEMPLATE_DIRS(TEMPLATE_DIRS): # """Install django settings""" # # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') # pass -@hookspec +@abx.hookspec +@abx.hookimpl def get_STATICFILES_DIRS(): return [] # e.g. ['your_plugin_type/plugin_name/static'] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_STATICFILES_DIRS(STATICFILES_DIRS): # """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" # # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') # pass -@hookspec -def get_MIDDLEWARE(): +@abx.hookspec +@abx.hookimpl +def get_MIDDLEWARES(): return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_MIDDLEWARE(MIDDLEWARE): # """Mutate MIDDLEWARE in place to add your middleware in a specific position""" # # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') # pass -@hookspec +@abx.hookspec +@abx.hookimpl def get_AUTHENTICATION_BACKENDS(): return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): # """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" # # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') # pass -@hookspec +@abx.hookspec +@abx.hookimpl def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME): - return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}] + return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}} -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_DJANGO_HUEY(DJANGO_HUEY): # """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" # # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' # pass -@hookspec +@abx.hookspec +@abx.hookimpl def get_ADMIN_DATA_VIEWS_URLS(): return [] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): # """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" # # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') # pass -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_settings(settings): # """Mutate settings in place to add your settings / modify existing settings""" # # settings.SOME_KEY = 'some_value' @@ -93,11 +105,13 @@ def get_ADMIN_DATA_VIEWS_URLS(): ########################################################################################### -@hookspec +@abx.hookspec +@abx.hookimpl def get_urlpatterns(): return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] -# @hookspec +# @abx.hookspec +# @abx.hookimpl # def register_urlpatterns(urlpatterns): # """Mutate urlpatterns in place to add your urlpatterns in a specific position""" # # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) @@ -105,21 +119,22 @@ def get_urlpatterns(): ########################################################################################### -@hookspec -def register_checks(): - """Register django checks with django system checks system""" - pass -@hookspec + +@abx.hookspec +@abx.hookimpl def register_admin(admin_site): """Register django admin views/models with the main django admin site instance""" + # e.g. admin_site.register(your_model, your_admin_class) pass ########################################################################################### -@hookspec +@abx.hookspec +@abx.hookimpl def ready(): """Called when Django apps app.ready() are triggered""" + # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate() pass diff --git a/archivebox/abx/django/apps.py b/packages/abx-spec-django/abx_spec_django/apps.py similarity index 71% rename from archivebox/abx/django/apps.py rename to packages/abx-spec-django/abx_spec_django/apps.py index 085647c1..667b74c0 100644 --- a/archivebox/abx/django/apps.py +++ b/packages/abx-spec-django/abx_spec_django/apps.py @@ -1,13 +1,14 @@ -__package__ = 'abx.django' +__package__ = 'abx_spec_django' from django.apps import AppConfig +import abx + class ABXConfig(AppConfig): - name = 'abx' + name = 'abx_spec_django' def ready(self): - import abx from django.conf import settings abx.pm.hook.ready(settings=settings) diff --git a/packages/abx-spec-django/pyproject.toml b/packages/abx-spec-django/pyproject.toml new file mode 100644 index 00000000..09ed31ff --- /dev/null +++ b/packages/abx-spec-django/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-django" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_django = "abx_spec_django" diff --git a/packages/abx-spec-extractor/README.md b/packages/abx-spec-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/packages/abx-spec-extractor/abx_spec_extractor.py b/packages/abx-spec-extractor/abx_spec_extractor.py new file mode 100644 index 00000000..74659467 --- /dev/null +++ b/packages/abx-spec-extractor/abx_spec_extractor.py @@ -0,0 +1,211 @@ +import os + +from typing import Optional, List, Annotated, Tuple +from pathlib import Path + +from pydantic import AfterValidator +from pydantic_pkgr import BinName + + +import abx + + +def assert_no_empty_args(args: List[str]) -> List[str]: + assert all(len(arg) for arg in args) + return args + +ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())] + +HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))] +CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)] + + +@abx.hookspec +@abx.hookimpl +def get_EXTRACTORS(): + return [] + +@abx.hookspec +@abx.hookimpl +def extract(uri: str, config: dict | None=None): + return {} + +@abx.hookspec(firstresult=True) +@abx.hookimpl(trylast=True) +def should_extract(uri: str, extractor: str, config: dict | None=None): + return False + + +class BaseExtractor: + name: ExtractorName + binary: BinName + + default_args: CmdArgsList = [] + extra_args: CmdArgsList = [] + + def get_output_path(self, snapshot) -> Path: + return Path(self.__class__.__name__.lower()) + + def should_extract(self, uri: str, config: dict | None=None) -> bool: + try: + assert self.detect_installed_binary().version + except Exception: + raise + # could not load binary + return False + + # output_dir = self.get_output_path(snapshot) + # if output_dir.glob('*.*'): + # return False + return True + + # @abx.hookimpl + # def extract(self, snapshot_id: str) -> Dict[str, Any]: + # from core.models import Snapshot + # from archivebox import CONSTANTS + + # snapshot = Snapshot.objects.get(id=snapshot_id) + + # if not self.should_extract(snapshot.url): + # return {} + + # status = 'failed' + # start_ts = timezone.now() + # uplink = self.detect_network_interface() + # installed_binary = self.detect_installed_binary() + # machine = installed_binary.machine + # assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true + + # output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid) + # output_dir.mkdir(parents=True, exist_ok=True) + + # # execute the extractor binary with the given args + # args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args] + # cmd = [str(installed_binary.abspath), *args] + # proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir) + + # # collect the output + # end_ts = timezone.now() + # output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')) + # stdout = proc.stdout.strip() + # stderr = proc.stderr.strip() + # output_json = None + # output_text = stdout + # try: + # output_json = json.loads(stdout.strip()) + # output_text = None + # except json.JSONDecodeError: + # pass + + # errors = [] + # if proc.returncode == 0: + # status = 'success' + # else: + # errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}') + + # # increment health stats counters + # if status == 'success': + # machine.record_health_success() + # uplink.record_health_success() + # installed_binary.record_health_success() + # else: + # machine.record_health_failure() + # uplink.record_health_failure() + # installed_binary.record_health_failure() + + # return { + # 'extractor': self.name, + + # 'snapshot': { + # 'id': snapshot.id, + # 'abid': snapshot.abid, + # 'url': snapshot.url, + # 'created_by_id': snapshot.created_by_id, + # }, + + # 'machine': { + # 'id': machine.id, + # 'abid': machine.abid, + # 'guid': machine.guid, + # 'hostname': machine.hostname, + # 'hw_in_docker': machine.hw_in_docker, + # 'hw_in_vm': machine.hw_in_vm, + # 'hw_manufacturer': machine.hw_manufacturer, + # 'hw_product': machine.hw_product, + # 'hw_uuid': machine.hw_uuid, + # 'os_arch': machine.os_arch, + # 'os_family': machine.os_family, + # 'os_platform': machine.os_platform, + # 'os_release': machine.os_release, + # 'os_kernel': machine.os_kernel, + # }, + + # 'uplink': { + # 'id': uplink.id, + # 'abid': uplink.abid, + # 'mac_address': uplink.mac_address, + # 'ip_public': uplink.ip_public, + # 'ip_local': uplink.ip_local, + # 'dns_server': uplink.dns_server, + # 'hostname': uplink.hostname, + # 'iface': uplink.iface, + # 'isp': uplink.isp, + # 'city': uplink.city, + # 'region': uplink.region, + # 'country': uplink.country, + # }, + + # 'binary': { + # 'id': installed_binary.id, + # 'abid': installed_binary.abid, + # 'name': installed_binary.name, + # 'binprovider': installed_binary.binprovider, + # 'abspath': installed_binary.abspath, + # 'version': installed_binary.version, + # 'sha256': installed_binary.sha256, + # }, + + # 'cmd': cmd, + # 'stdout': stdout, + # 'stderr': stderr, + # 'returncode': proc.returncode, + # 'start_ts': start_ts, + # 'end_ts': end_ts, + + # 'status': status, + # 'errors': errors, + # 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)), + # 'output_files': output_files, + # 'output_json': output_json or {}, + # 'output_text': output_text or '', + # } + + # TODO: move this to a hookimpl + def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None): + cwd = cwd or Path(os.getcwd()) + binary = self.load_binary(installed_binary=installed_binary) + + return binary.exec(cmd=args, cwd=cwd) + + # @cached_property + @property + def BINARY(self): + # import abx.archivebox.reads + # for binary in abx.archivebox.reads.get_BINARIES().values(): + # if binary.name == self.binary: + # return binary + raise ValueError(f'Binary {self.binary} not found') + + def detect_installed_binary(self): + from machine.models import InstalledBinary + # hydrates binary from DB/cache if record of installed version is recent enough + # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host + return InstalledBinary.objects.get_from_db_or_cache(self.BINARY) + + def load_binary(self, installed_binary=None): + installed_binary = installed_binary or self.detect_installed_binary() + return installed_binary.load_from_db() + + # def detect_network_interface(self): + # from machine.models import NetworkInterface + # return NetworkInterface.objects.current() diff --git a/packages/abx-spec-extractor/pyproject.toml b/packages/abx-spec-extractor/pyproject.toml new file mode 100644 index 00000000..5d49fef2 --- /dev/null +++ b/packages/abx-spec-extractor/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-spec-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.26.0", + "pydantic>=2.5.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_extractor = "abx_spec_extractor" diff --git a/packages/abx-spec-pydantic-pkgr/README.md b/packages/abx-spec-pydantic-pkgr/README.md new file mode 100644 index 00000000..e69de29b diff --git a/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py new file mode 100644 index 00000000..4665452a --- /dev/null +++ b/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py @@ -0,0 +1,72 @@ +import os + +from typing import Dict +from pathlib import Path + +import abx + +from pydantic_pkgr import Binary, BinProvider + +########################################################################################### + +@abx.hookspec +@abx.hookimpl() +def get_BINPROVIDERS() -> Dict[str, BinProvider]: + return {} + +@abx.hookspec +@abx.hookimpl() +def get_BINARIES() -> Dict[str, Binary]: + return {} + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_BINPROVIDER(binprovider_name: str) -> BinProvider: + return abx.as_dict(abx.pm.hook.get_BINPROVIDERS())[binprovider_name] + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def get_BINARY(bin_name: str) -> BinProvider: + return abx.as_dict(abx.pm.hook.get_BINARYS())[bin_name] + + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def binary_load(binary: Binary, **kwargs) -> Binary: + loaded_binary = binary.load(**kwargs) + abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def binary_install(binary: Binary, **kwargs) -> Binary: + loaded_binary = binary.install(**kwargs) + abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def binary_load_or_install(binary: Binary, **kwargs) -> Binary: + loaded_binary = binary.load_or_install(**kwargs) + abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + +@abx.hookspec(firstresult=True) +@abx.hookimpl +def binary_symlink_to_bin_dir(binary: Binary, bin_dir: Path | None=None): + LIB_DIR = Path(abx.pm.hook.get_CONFIG().get('LIB_DIR', '/usr/local/share/abx')) + BIN_DIR = bin_dir or Path(abx.pm.hook.get_CONFIG().get('BIN_DIR', LIB_DIR / 'bin')) + + if not (binary.abspath and os.path.isfile(binary.abspath)): + return + + try: + BIN_DIR.mkdir(parents=True, exist_ok=True) + symlink = BIN_DIR / binary.name + symlink.unlink(missing_ok=True) + symlink.symlink_to(binary.abspath) + symlink.chmod(0o777) # make sure its executable by everyone + except Exception: + # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') + # not actually needed, we can just run without it + pass diff --git a/packages/abx-spec-pydantic-pkgr/pyproject.toml b/packages/abx-spec-pydantic-pkgr/pyproject.toml new file mode 100644 index 00000000..67f1f62f --- /dev/null +++ b/packages/abx-spec-pydantic-pkgr/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-pydantic-pkgr" +version = "0.1.0" +description = "The ABX plugin specification for Binaries and BinProviders" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr" diff --git a/packages/abx-spec-searchbackend/README.md b/packages/abx-spec-searchbackend/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/abx/archivebox/base_searchbackend.py b/packages/abx-spec-searchbackend/abx_spec_searchbackend.py similarity index 73% rename from archivebox/abx/archivebox/base_searchbackend.py rename to packages/abx-spec-searchbackend/abx_spec_searchbackend.py index 72713ab8..66b34114 100644 --- a/archivebox/abx/archivebox/base_searchbackend.py +++ b/packages/abx-spec-searchbackend/abx_spec_searchbackend.py @@ -1,8 +1,12 @@ -__package__ = 'abx.archivebox' - -from typing import Iterable, List import abc +from typing import Iterable, List, Dict +import abx + +@abx.hookspec +@abx.hookimpl +def get_SEARCHBACKENDS() -> Dict[abx.PluginId, 'BaseSearchBackend']: + return {} class BaseSearchBackend(abc.ABC): diff --git a/packages/abx-spec-searchbackend/pyproject.toml b/packages/abx-spec-searchbackend/pyproject.toml new file mode 100644 index 00000000..2a9ac3ce --- /dev/null +++ b/packages/abx-spec-searchbackend/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-spec-searchbackend" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.26.0", + "pydantic>=2.5.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_searchbackend = "abx_spec_searchbackend" diff --git a/packages/abx/README.md b/packages/abx/README.md new file mode 100644 index 00000000..e69de29b diff --git a/packages/abx/abx.py b/packages/abx/abx.py new file mode 100644 index 00000000..0ce28462 --- /dev/null +++ b/packages/abx/abx.py @@ -0,0 +1,344 @@ +__package__ = 'abx' +__id__ = 'abx' +__label__ = 'ABX' +__author__ = 'Nick Sweeting' +__homepage__ = 'https://github.com/ArchiveBox' +__order__ = 0 + + +import sys +import inspect +import importlib +import itertools +from pathlib import Path +from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypedDict, Type, cast +from types import ModuleType +from typing_extensions import Annotated +from functools import cache + +from benedict import benedict +from pydantic import AfterValidator + +from pluggy import HookspecMarker, HookimplMarker, PluginManager, HookimplOpts + +spec = hookspec = HookspecMarker("abx") +impl = hookimpl = HookimplMarker("abx") + + + +AttrName = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_'))] +PluginId = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_') and x.islower())] + +class PluginInfo(TypedDict, total=False): + id: PluginId + package: AttrName + label: str + version: str + author: str + homepage: str + dependencies: List[str] + + source_code: str + hooks: Dict[AttrName, Callable] + module: ModuleType + + + +class PatchedPluginManager(PluginManager): + """ + Patch to fix pluggy's PluginManager to work with pydantic models. + See: https://github.com/pytest-dev/pluggy/pull/536 + """ + def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None: + # IMPORTANT: @property methods can have side effects, and are never hookimpl + # if attr is a property, skip it in advance + plugin_class = plugin if inspect.isclass(plugin) else type(plugin) + if isinstance(getattr(plugin_class, name, None), property): + return None + + # pydantic model fields are like attrs and also can never be hookimpls + plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__") + if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}): + # pydantic models mess with the class and attr __signature__ + # so inspect.isroutine(...) throws exceptions and cant be used + return None + + try: + return super().parse_hookimpl_opts(plugin, name) + except AttributeError: + return super().parse_hookimpl_opts(type(plugin), name) + +pm = PatchedPluginManager("abx") + + + +@hookspec(firstresult=True) +@hookimpl +@cache +def get_PLUGIN_ORDER(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]: + plugin_dir = None + plugin_module = None + + if isinstance(plugin, str) or isinstance(plugin, Path): + if str(plugin).endswith('.py'): + plugin_dir = Path(plugin).parent + plugin_id = plugin_dir.name + elif '/' in str(plugin): + # assume it's a path to a plugin directory + plugin_dir = Path(plugin) + plugin_id = plugin_dir.name + elif str(plugin).isidentifier(): + # assume it's a plugin_id + plugin_id = str(plugin) + + elif inspect.ismodule(plugin) or inspect.isclass(plugin): + plugin_module = plugin + plugin_dir = Path(str(plugin_module.__file__)).parent + plugin_id = plugin_dir.name + else: + raise ValueError(f'Invalid plugin, cannot get order: {plugin}') + + if plugin_dir: + try: + # if .plugin_order file exists, use it to set the load priority + order = int((plugin_dir / '.plugin_order').read_text()) + return (order, plugin_dir) + except FileNotFoundError: + pass + + if not plugin_module: + try: + plugin_module = importlib.import_module(plugin_id) + except ImportError: + raise ValueError(f'Invalid plugin, cannot get order: {plugin}') + + if plugin_module and not plugin_dir: + plugin_dir = Path(str(plugin_module.__file__)).parent + + assert plugin_dir + + return (getattr(plugin_module, '__order__', 999), plugin_dir) + +# @hookspec +# @hookimpl +# def get_PLUGIN() -> Dict[PluginId, PluginInfo]: +# """Get the info for a single plugin, implemented by each plugin""" +# return { +# __id__: PluginInfo({ +# 'id': __id__, +# 'package': str(__package__), +# 'label': __id__, +# 'version': __version__, +# 'author': __author__, +# 'homepage': __homepage__, +# 'dependencies': __dependencies__, +# }), +# } + +@hookspec(firstresult=True) +@hookimpl +@cache +def get_PLUGIN_METADATA(plugin: PluginId | ModuleType | Type) -> PluginInfo: + # TODO: remove get_PLUGIN hook in favor of pyproject.toml and __attr__s metdata + # having three methods to detect plugin metadata is overkill + + assert plugin + + # import the plugin module by its name + if isinstance(plugin, str): + module = importlib.import_module(plugin) + plugin_id = plugin + elif inspect.ismodule(plugin) or inspect.isclass(plugin): + module = plugin + plugin_id = plugin.__package__ + else: + raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}') + + assert module.__file__ + + # load the plugin info from the plugin/__init__.py __attr__s if they exist + plugin_module_attrs = { + 'id': getattr(module, '__id__', plugin_id), + 'name': getattr(module, '__id__', plugin_id), + 'label': getattr(module, '__label__', plugin_id), + 'version': getattr(module, '__version__', '0.0.1'), + 'author': getattr(module, '__author__', 'Unknown'), + 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'), + 'dependencies': getattr(module, '__dependencies__', []), + } + + # load the plugin info from the plugin.get_PLUGIN() hook method if it has one + plugin_info_dict = {} + if hasattr(module, 'get_PLUGIN'): + plugin_info_dict = { + key.lower(): value + for key, value in module.get_PLUGIN().items() + } + + # load the plugin info from the plugin/pyproject.toml file if it has one + plugin_toml_info = {} + try: + # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir + plugin_toml_info = benedict.from_toml((Path(module.__file__).parent / 'pyproject.toml').read_text()).project + except Exception: + try: + # try loading ../pyproject.toml next in case the plugin is in a packge dir + plugin_toml_info = benedict.from_toml((Path(module.__file__).parent.parent / 'pyproject.toml').read_text()).project + except Exception as e: + print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, Path(module.__file__).parent, 'ERROR:', e) + + # merge the plugin info from all sources + add dyanmically calculated info + return cast(PluginInfo, benedict(PluginInfo(**{ + 'id': plugin_id, + **plugin_module_attrs, + **plugin_info_dict, + **plugin_toml_info, + 'package': module.__package__, + 'module': module, + 'order': pm.hook.get_PLUGIN_ORDER(plugin=module), + 'source_code': module.__file__, + 'hooks': get_plugin_hooks(module), + }))) + +@hookspec(firstresult=True) +@hookimpl +def get_ALL_PLUGINS() -> Dict[PluginId, PluginInfo]: + """Get a flat dictionary of all plugins {plugin_id: {...plugin_metadata}}""" + return as_dict(pm.hook.get_PLUGIN()) + + +@hookspec(firstresult=True) +@hookimpl +def get_ALL_PLUGINS_METADATA() -> Dict[PluginId, PluginInfo]: + """Get the metadata for all the plugins registered with Pluggy.""" + plugins = {} + for plugin_module in pm.get_plugins(): + plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin_module) + assert 'id' in plugin_info + plugins[plugin_info['id']] = plugin_info + return benedict(plugins) + +@hookspec(firstresult=True) +@hookimpl +def get_ALL_PLUGIN_HOOK_NAMES() -> Set[str]: + """Get a set of all hook names across all plugins""" + return { + hook_name + for plugin_module in pm.get_plugins() + for hook_name in get_plugin_hooks(plugin_module) + } + +pm.add_hookspecs(sys.modules[__name__]) +pm.register(sys.modules[__name__]) + + +###### PLUGIN DISCOVERY AND LOADING ######################################################## + + + +def register_hookspecs(plugin_ids: Iterable[PluginId]): + """ + Register all the hookspecs from a list of module names. + """ + for plugin_id in plugin_ids: + hookspec_module = importlib.import_module(plugin_id) + pm.add_hookspecs(hookspec_module) + + +def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]: + """ + Find all the plugins in a given directory. Just looks for an __init__.py file. + """ + return { + plugin_entrypoint.parent.name: plugin_entrypoint.parent + for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=pm.hook.get_PLUGIN_ORDER) # type:ignore + if plugin_entrypoint.parent.name != 'abx' + } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" + + +def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]: + """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" + import importlib.metadata + + DETECTED_PLUGINS = {} # module_name: module_dir_path + for dist in list(importlib.metadata.distributions()): + for entrypoint in dist.entry_points: + if entrypoint.group != group or pm.is_blocked(entrypoint.name): + continue + DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent + # pm.register(plugin, name=ep.name) + # pm._plugin_distinfo.append((plugin, DistFacade(dist))) + return DETECTED_PLUGINS + + + +# Load all plugins from pip packages, archivebox built-ins, and user plugins +def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]): + """ + Load all the plugins from a dictionary of module names and directory paths. + """ + LOADED_PLUGINS = {} + for plugin in plugins: + plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin) + assert 'id' in plugin_info and 'module' in plugin_info + if plugin_info['module'] in pm.get_plugins(): + LOADED_PLUGINS[plugin_info['id']] = plugin_info + continue + try: + pm.add_hookspecs(plugin_info['module']) + except ValueError: + # not all plugins register new hookspecs, some only have hookimpls + pass + pm.register(plugin_info['module']) + LOADED_PLUGINS[plugin_info['id']] = plugin_info + # print(f' √ Loaded plugin: {plugin_id}') + return benedict(LOADED_PLUGINS) + +@cache +def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]: + """Get all the functions marked with @hookimpl on a module.""" + if not plugin: + return {} + + hooks = {} + + if isinstance(plugin, str): + plugin_module = importlib.import_module(plugin) + elif inspect.ismodule(plugin) or inspect.isclass(plugin): + plugin_module = plugin + else: + raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}') + + for attr_name in dir(plugin_module): + if attr_name.startswith('_'): + continue + try: + attr = getattr(plugin_module, attr_name) + if isinstance(attr, Callable): + if pm.parse_hookimpl_opts(plugin_module, attr_name): + hooks[attr_name] = attr + except Exception as e: + print(f'Error getting hookimpls for {plugin}: {e}') + + return hooks + + +def as_list(results) -> List[Any]: + """Flatten a list of lists returned by a pm.hook.call() into a single list""" + return list(itertools.chain(*results)) + + +def as_dict(results: Dict[str, Dict[PluginId, Any]] | List[Dict[PluginId, Any]]) -> Dict[PluginId, Any]: + """Flatten a list of dicts returned by a pm.hook.call() into a single dict""" + if isinstance(results, (dict, benedict)): + results_list = results.values() + else: + results_list = results + + return benedict({ + result_id: result + for plugin_results in results_list + for result_id, result in dict(plugin_results).items() + }) + + diff --git a/packages/abx/pyproject.toml b/packages/abx/pyproject.toml new file mode 100644 index 00000000..3c185653 --- /dev/null +++ b/packages/abx/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "abx" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "pluggy>=1.5.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/packages/archivebox-pocket/.circleci/config.yml b/packages/archivebox-pocket/.circleci/config.yml new file mode 100644 index 00000000..a20a6aae --- /dev/null +++ b/packages/archivebox-pocket/.circleci/config.yml @@ -0,0 +1,61 @@ +version: 2.1 +orbs: + python: circleci/python@2.0.3 + +jobs: + build_and_test_3_7: + docker: + - image: circleci/python:3.7 + executor: python/default + steps: + - checkout + - python/install-packages: + pkg-manager: pip + - run: + name: Run tests + command: nosetests + + build_and_test_3_8: + docker: + - image: circleci/python:3.8 + executor: python/default + steps: + - checkout + - python/install-packages: + pkg-manager: pip + - run: + name: Run tests + command: nosetests + + build_and_test_3_9: + docker: + - image: circleci/python:3.9 + executor: python/default + steps: + - checkout + - python/install-packages: + pkg-manager: pip + - run: + name: Run tests + command: nosetests + + build_and_test_3_10: + docker: + - image: circleci/python:3.10 + executor: python/default + steps: + - checkout + - python/install-packages: + pkg-manager: pip + - run: + name: Run tests + command: nosetests + + +workflows: + test_pocket: + jobs: + - build_and_test_3_7 + - build_and_test_3_8 + - build_and_test_3_9 + - build_and_test_3_10 diff --git a/packages/archivebox-pocket/.gitignore b/packages/archivebox-pocket/.gitignore new file mode 100644 index 00000000..8acafa3c --- /dev/null +++ b/packages/archivebox-pocket/.gitignore @@ -0,0 +1,43 @@ +*.py[co] + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +.pypirc + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox + +#Translations +*.mo + +#Mr Developer +.mr.developer.cfg + +# Virtualenv +include/ +lib/ +local/ +.Python + +# ViM files +.*.swp +.*.swo + +# Misc +*.log +*.pid +*.sql diff --git a/packages/archivebox-pocket/LICENSE.md b/packages/archivebox-pocket/LICENSE.md new file mode 100644 index 00000000..3b145165 --- /dev/null +++ b/packages/archivebox-pocket/LICENSE.md @@ -0,0 +1,27 @@ +Copyright (c) 2014, Tapan Pandita +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +* Neither the name of pocket nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/archivebox-pocket/MANIFEST.in b/packages/archivebox-pocket/MANIFEST.in new file mode 100644 index 00000000..7425f8e8 --- /dev/null +++ b/packages/archivebox-pocket/MANIFEST.in @@ -0,0 +1,2 @@ +include LICENSE.md +include README.md diff --git a/packages/archivebox-pocket/README.md b/packages/archivebox-pocket/README.md new file mode 100644 index 00000000..6b2430be --- /dev/null +++ b/packages/archivebox-pocket/README.md @@ -0,0 +1,66 @@ +Pocket +====== +[![CircleCI](https://img.shields.io/circleci/build/github/tapanpandita/pocket/master?logo=CircleCI)](https://circleci.com/gh/tapanpandita/pocket) +[![Pypi](https://img.shields.io/pypi/v/pocket.svg)](https://pypi.python.org/pypi/pocket) +[![PyPI - Downloads](https://img.shields.io/pypi/dm/pocket.svg)](https://pypi.python.org/pypi/pocket) +![GitHub](https://img.shields.io/github/license/tapanpandita/pocket.svg) + + +A python wrapper for the [pocket api](http://getpocket.com/api/docs). + +Installation +------------ +``` +pip install pocket +``` + +Usage +------ + +You'll need your pocket consumer key. You can find this from your account page. +You will also need the access token for the account you want to modify. +Then, you need to create an instance of the pocket object + +```python +import pocket + +pocket_instance = pocket.Pocket(consumer_key, access_token) +``` + +### Chaining Modify Methods + +All the modify methods can be chained together for creating one bulk query. If you don't wish to chain the methods, just pass `wait=False`. + +```python +import pocket + +pocket_instance = pocket.Pocket(consumer_key, access_token) + +# perfoms all these actions in one request +# NOTE: Each individual method returns the instance itself. The response +# dictionary is not returned till commit is called on the instance. +response, headers = pocket_instance.archive(item_id1).archive(item_id2).favorite(item_id3).delete(item_id4).commit() + +# performs action immediately and returns a dictionary +pocket_instance.archive(item_id1, wait=False) +``` + +### OAUTH + +To get request token, use the get_request_token class method. To get the access token use the get_access_token method. + +```python +from pocket import Pocket + +request_token = Pocket.get_request_token(consumer_key=consumer_key, redirect_uri=redirect_uri) + +# URL to redirect user to, to authorize your app +auth_url = Pocket.get_auth_url(code=request_token, redirect_uri=redirect_uri) +# e.g. import subprocess; subprocess.run(['xdg-open', auth_url]) + +user_credentials = Pocket.get_credentials(consumer_key=consumer_key, code=request_token) + +access_token = user_credentials['access_token'] +``` + +For detailed documentation of the methods available, please visit the official [pocket api documentation](http://getpocket.com/api/docs). diff --git a/packages/archivebox-pocket/pocket.py b/packages/archivebox-pocket/pocket.py new file mode 100644 index 00000000..b5b8d2fa --- /dev/null +++ b/packages/archivebox-pocket/pocket.py @@ -0,0 +1,366 @@ +import requests +import json +from functools import wraps + + +class PocketException(Exception): + ''' + Base class for all pocket exceptions + http://getpocket.com/developer/docs/errors + + ''' + pass + + +class InvalidQueryException(PocketException): + pass + + +class AuthException(PocketException): + pass + + +class RateLimitException(PocketException): + ''' + http://getpocket.com/developer/docs/rate-limits + + ''' + pass + + +class ServerMaintenanceException(PocketException): + pass + +EXCEPTIONS = { + 400: InvalidQueryException, + 401: AuthException, + 403: RateLimitException, + 503: ServerMaintenanceException, +} + + +def method_wrapper(fn): + + @wraps(fn) + def wrapped(self, *args, **kwargs): + arg_names = list(fn.__code__.co_varnames) + arg_names.remove('self') + kwargs.update(dict(zip(arg_names, args))) + + url = self.api_endpoints[fn.__name__] + payload = dict([ + (k, v) for k, v in kwargs.items() + if v is not None + ]) + payload.update(self.get_payload()) + + return self.make_request(url, payload) + + return wrapped + + +def bulk_wrapper(fn): + + @wraps(fn) + def wrapped(self, *args, **kwargs): + arg_names = list(fn.__code__.co_varnames) + arg_names.remove('self') + kwargs.update(dict(zip(arg_names, args))) + + wait = kwargs.get('wait', True) + query = dict( + [(k, v) for k, v in kwargs.items() if v is not None] + ) + # TODO: Fix this hack + query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__ + + if wait: + self.add_bulk_query(query) + return self + else: + url = self.api_endpoints['send'] + payload = { + 'actions': [query], + } + payload.update(self.get_payload()) + return self.make_request( + url, + json.dumps(payload), + headers={'content-type': 'application/json'}, + ) + + return wrapped + + +class Pocket(object): + ''' + This class implements a basic python wrapper around the pocket api. For a + detailed documentation of the methods and what they do please refer the + official pocket api documentation at + http://getpocket.com/developer/docs/overview + + ''' + api_endpoints = dict( + (method, 'https://getpocket.com/v3/%s' % method) + for method in "add,send,get".split(",") + ) + + statuses = { + 200: 'Request was successful', + 400: 'Invalid request, please make sure you follow the ' + 'documentation for proper syntax', + 401: 'Problem authenticating the user', + 403: 'User was authenticated, but access denied due to lack of ' + 'permission or rate limiting', + 503: 'Pocket\'s sync server is down for scheduled maintenance.', + } + + def __init__(self, consumer_key, access_token): + self.consumer_key = consumer_key + self.access_token = access_token + self._bulk_query = [] + + self._payload = { + 'consumer_key': self.consumer_key, + 'access_token': self.access_token, + } + + def get_payload(self): + return self._payload + + def add_bulk_query(self, query): + self._bulk_query.append(query) + + @staticmethod + def _post_request(url, payload, headers): + r = requests.post(url, data=payload, headers=headers) + return r + + @classmethod + def _make_request(cls, url, payload, headers=None): + r = cls._post_request(url, payload, headers) + + if r.status_code > 399: + error_msg = cls.statuses.get(r.status_code) + extra_info = r.headers.get('X-Error') + raise EXCEPTIONS.get(r.status_code, PocketException)( + '%s. %s' % (error_msg, extra_info) + ) + + return r.json() or r.text, r.headers + + @classmethod + def make_request(cls, url, payload, headers=None): + return cls._make_request(url, payload, headers) + + @method_wrapper + def add(self, url, title=None, tags=None, tweet_id=None): + ''' + This method allows you to add a page to a user's list. + In order to use the /v3/add endpoint, your consumer key must have the + "Add" permission. + http://getpocket.com/developer/docs/v3/add + + ''' + + @method_wrapper + def get( + self, state=None, favorite=None, tag=None, contentType=None, + sort=None, detailType=None, search=None, domain=None, since=None, + count=None, offset=None + ): + ''' + This method allows you to retrieve a user's list. It supports + retrieving items changed since a specific time to allow for syncing. + http://getpocket.com/developer/docs/v3/retrieve + + ''' + + @method_wrapper + def send(self, actions): + ''' + This method allows you to make changes to a user's list. It supports + adding new pages, marking pages as read, changing titles, or updating + tags. Multiple changes to items can be made in one request. + http://getpocket.com/developer/docs/v3/modify + + ''' + + @bulk_wrapper + def bulk_add( + self, item_id, ref_id=None, tags=None, time=None, title=None, + url=None, wait=True + ): + ''' + Add a new item to the user's list + http://getpocket.com/developer/docs/v3/modify#action_add + + ''' + + @bulk_wrapper + def archive(self, item_id, time=None, wait=True): + ''' + Move an item to the user's archive + http://getpocket.com/developer/docs/v3/modify#action_archive + + ''' + + @bulk_wrapper + def readd(self, item_id, time=None, wait=True): + ''' + Re-add (unarchive) an item to the user's list + http://getpocket.com/developer/docs/v3/modify#action_readd + + ''' + + @bulk_wrapper + def favorite(self, item_id, time=None, wait=True): + ''' + Mark an item as a favorite + http://getpocket.com/developer/docs/v3/modify#action_favorite + + ''' + + @bulk_wrapper + def unfavorite(self, item_id, time=None, wait=True): + ''' + Remove an item from the user's favorites + http://getpocket.com/developer/docs/v3/modify#action_unfavorite + + ''' + + @bulk_wrapper + def delete(self, item_id, time=None, wait=True): + ''' + Permanently remove an item from the user's account + http://getpocket.com/developer/docs/v3/modify#action_delete + + ''' + + @bulk_wrapper + def tags_add(self, item_id, tags, time=None, wait=True): + ''' + Add one or more tags to an item + http://getpocket.com/developer/docs/v3/modify#action_tags_add + + ''' + + @bulk_wrapper + def tags_remove(self, item_id, tags, time=None, wait=True): + ''' + Remove one or more tags from an item + http://getpocket.com/developer/docs/v3/modify#action_tags_remove + + ''' + + @bulk_wrapper + def tags_replace(self, item_id, tags, time=None, wait=True): + ''' + Replace all of the tags for an item with one or more provided tags + http://getpocket.com/developer/docs/v3/modify#action_tags_replace + + ''' + + @bulk_wrapper + def tags_clear(self, item_id, time=None, wait=True): + ''' + Remove all tags from an item. + http://getpocket.com/developer/docs/v3/modify#action_tags_clear + + ''' + + @bulk_wrapper + def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True): + ''' + Rename a tag. This affects all items with this tag. + http://getpocket.com/developer/docs/v3/modify#action_tag_rename + + ''' + + def commit(self): + ''' + This method executes the bulk query, flushes stored queries and + returns the response + + ''' + url = self.api_endpoints['send'] + payload = { + 'actions': self._bulk_query, + } + payload.update(self._payload) + self._bulk_query = [] + + return self._make_request( + url, + json.dumps(payload), + headers={'content-type': 'application/json'}, + ) + + @classmethod + def get_request_token( + cls, consumer_key, redirect_uri='http://example.com/', state=None + ): + ''' + Returns the request token that can be used to fetch the access token + + ''' + headers = { + 'X-Accept': 'application/json', + } + url = 'https://getpocket.com/v3/oauth/request' + payload = { + 'consumer_key': consumer_key, + 'redirect_uri': redirect_uri, + } + + if state: + payload['state'] = state + + return cls._make_request(url, payload, headers)[0]['code'] + + @classmethod + def get_credentials(cls, consumer_key, code): + ''' + Fetches access token from using the request token and consumer key + + ''' + headers = { + 'X-Accept': 'application/json', + } + url = 'https://getpocket.com/v3/oauth/authorize' + payload = { + 'consumer_key': consumer_key, + 'code': code, + } + + return cls._make_request(url, payload, headers)[0] + + @classmethod + def get_access_token(cls, consumer_key, code): + return cls.get_credentials(consumer_key, code)['access_token'] + + @classmethod + def get_auth_url(cls, code, redirect_uri='http://example.com'): + auth_url = ('https://getpocket.com/auth/authorize' + '?request_token=%s&redirect_uri=%s' % (code, redirect_uri)) + return auth_url + + @classmethod + def auth( + cls, consumer_key, redirect_uri='http://example.com/', state=None, + ): + ''' + This is a test method for verifying if oauth worked + http://getpocket.com/developer/docs/authentication + + ''' + code = cls.get_request_token(consumer_key, redirect_uri, state) + + auth_url = 'https://getpocket.com/auth/authorize?request_token='\ + '%s&redirect_uri=%s' % (code, redirect_uri) + raw_input( + 'Please open %s in your browser to authorize the app and ' + 'press enter:' % auth_url + ) + + return cls.get_access_token(consumer_key, code) diff --git a/packages/archivebox-pocket/pyproject.toml b/packages/archivebox-pocket/pyproject.toml new file mode 100644 index 00000000..6acf8a57 --- /dev/null +++ b/packages/archivebox-pocket/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "archivebox-pocket" +version = "0.3.7" +description = " api wrapper for getpocket.com" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "requests>=2.32.3", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.sdist] +packages = ["."] + +[tool.hatch.build.targets.wheel] +packages = ["."] diff --git a/packages/archivebox-pocket/requirements.txt b/packages/archivebox-pocket/requirements.txt new file mode 100644 index 00000000..9598beea --- /dev/null +++ b/packages/archivebox-pocket/requirements.txt @@ -0,0 +1,4 @@ +coverage==3.7.1 +mock==1.0.1 +nose==1.3.0 +requests==2.20.0 diff --git a/packages/archivebox-pocket/setup.py b/packages/archivebox-pocket/setup.py new file mode 100644 index 00000000..5a5baba0 --- /dev/null +++ b/packages/archivebox-pocket/setup.py @@ -0,0 +1,41 @@ +from setuptools import setup + +setup( + name = "pocket", # pip install pocket + description = "api wrapper for getpocket.com", + #long_description=open('README.md', 'rt').read(), + + # version + # third part for minor release + # second when api changes + # first when it becomes stable someday + version = "0.3.7", + author = 'Tapan Pandita', + author_email = "tapan.pandita@gmail.com", + + url = 'http://github.com/tapanpandita/pocket/', + license = 'BSD', + + # as a practice no need to hard code version unless you know program wont + # work unless the specific versions are used + install_requires = ["requests>=2.32.3"], + + py_modules = ["pocket"], + + zip_safe = True, +) + +# TODO: Do all this and delete these lines +# register: Create an accnt on pypi, store your credentials in ~/.pypirc: +# +# [pypirc] +# servers = +# pypi +# +# [server-login] +# username: +# password: +# +# $ python setup.py register # one time only, will create pypi page for pocket +# $ python setup.py sdist --formats=gztar,zip upload # create a new release +# diff --git a/packages/archivebox-pocket/test_pocket.py b/packages/archivebox-pocket/test_pocket.py new file mode 100644 index 00000000..14e67f53 --- /dev/null +++ b/packages/archivebox-pocket/test_pocket.py @@ -0,0 +1,52 @@ +import unittest +import pocket +from mock import patch + + +class PocketTest(unittest.TestCase): + + def setUp(self): + self.consumer_key = 'consumer_key' + self.access_token = 'access_token' + + def tearDown(self): + pass + + def test_pocket_init(self): + pocket_instance = pocket.Pocket( + self.consumer_key, + self.access_token, + ) + + self.assertEqual(self.consumer_key, pocket_instance.consumer_key) + self.assertEqual(self.access_token, pocket_instance.access_token) + + def test_pocket_init_payload(self): + pocket_instance = pocket.Pocket( + self.consumer_key, + self.access_token, + ) + expected_payload = { + 'consumer_key': self.consumer_key, + 'access_token': self.access_token, + } + + self.assertEqual(expected_payload, pocket_instance._payload) + + def test_post_request(self): + mock_payload = { + 'consumer_key': self.consumer_key, + 'access_token': self.access_token, + } + mock_url = 'https://getpocket.com/v3/' + mock_headers = { + 'content-type': 'application/json', + } + + with patch('pocket.requests') as mock_requests: + pocket.Pocket._post_request(mock_url, mock_payload, mock_headers) + mock_requests.post.assert_called_once_with( + mock_url, + data=mock_payload, + headers=mock_headers, + ) diff --git a/packages/pydantic-pkgr b/packages/pydantic-pkgr new file mode 160000 index 00000000..a116eaef --- /dev/null +++ b/packages/pydantic-pkgr @@ -0,0 +1 @@ +Subproject commit a116eaef7f090dc872b18e82b5a538313075ded6 diff --git a/pyproject.toml b/pyproject.toml index c75f0641..de870ada 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "archivebox" -version = "0.8.5rc51" +version = "0.8.5rc53" requires-python = ">=3.10" description = "Self-hosted internet archiving solution." authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] @@ -46,6 +46,7 @@ dependencies = [ "django-ninja>=1.3.0", "django-extensions>=3.2.3", "mypy-extensions>=1.0.0", + "typing_extensions>=4.12.2", "channels[daphne]>=4.1.0", "django-signal-webhooks>=0.3.0", "django-admin-data-views>=0.4.1", @@ -80,6 +81,22 @@ dependencies = [ # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "pydantic-pkgr>=0.5.4", ############# Plugin Dependencies ################ + "abx>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-archivebox>=0.1.0", + "abx-spec-django>=0.1.0", + "abx-spec-extractor>=0.1.0", + "abx-spec-searchbackend>=0.1.0", + + "abx-plugin-default-binproviders>=2024.10.24", + "abx-plugin-pip-binprovider>=2024.10.24", + "abx-plugin-npm-binprovider>=2024.10.24", + "abx-plugin-playwright-binprovider>=2024.10.24", + + # "abx-plugin-pocket", + # "abx-plugin-sonic", + # "abx-plugin-yt-dlp", "sonic-client>=1.0.0", "yt-dlp>=2024.8.6", # for: media" ] @@ -104,14 +121,14 @@ all = [ [tool.uv] dev-dependencies = [ ### BUILD - "uv", + "uv>=0.4.26", "pip>=24.2", "setuptools>=75.1.0", "wheel>=0.44.0", "homebrew-pypi-poet>=0.10.0", # for: generating archivebox.rb brewfile list of python packages ### DOCS "recommonmark>=0.7.1", - "sphinx", + "sphinx>=8.1.3", "sphinx-rtd-theme>=2.0.0", ### DEBUGGING "django-debug-toolbar>=4.4.6", @@ -121,7 +138,7 @@ dev-dependencies = [ "logfire[django]>=0.51.0", "opentelemetry-instrumentation-django>=0.47b0", "opentelemetry-instrumentation-sqlite3>=0.47b0", - "viztracer", # usage: viztracer ../.venv/bin/archivebox manage check + "viztracer>=0.17.0", # usage: viztracer ../.venv/bin/archivebox manage check # "snakeviz", # usage: python -m cProfile -o flamegraph.prof ../.venv/bin/archivebox manage check ### TESTING "pytest>=8.3.3", @@ -133,6 +150,26 @@ dev-dependencies = [ "django-autotyping>=0.5.1", ] +[tool.uv.sources] +abx = { workspace = true } +abx-spec-pydantic-pkgr = { workspace = true } +abx-spec-config = { workspace = true } +abx-spec-archivebox = { workspace = true } +abx-spec-django = { workspace = true } +abx-spec-extractor = { workspace = true } +abx-spec-searchbackend = { workspace = true } + +abx-plugin-default-binproviders = { workspace = true } +abx-plugin-pip-binprovider = { workspace = true } +abx-plugin-npm-binprovider = { workspace = true } +abx-plugin-playwright-binprovider = { workspace = true } + +pydantic-pkgr = { workspace = true } +archivebox-pocket = { workspace = true } + +[tool.uv.workspace] +members = ["packages/*"] + [build-system] requires = ["pdm-backend"] build-backend = "pdm.backend" diff --git a/requirements.txt b/requirements.txt index f9a37b4b..db2a66f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -166,7 +166,7 @@ parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython -phonenumbers==8.13.47 +phonenumbers==8.13.48 # via python-benedict platformdirs==4.3.6 # via pydantic-pkgr @@ -250,7 +250,7 @@ requests==2.32.3 # archivebox (pyproject.toml) # python-benedict # yt-dlp -rich==13.9.2 +rich==13.9.3 # via # archivebox (pyproject.toml) # rich-argparse @@ -332,7 +332,7 @@ xlrd==2.0.1 # via python-benedict xmltodict==0.14.2 # via python-benedict -yt-dlp==2024.10.7 +yt-dlp==2024.10.22 # via archivebox (pyproject.toml) -zope-interface==7.1.0 +zope-interface==7.1.1 # via twisted diff --git a/uv.lock b/uv.lock index f320d661..e4d6e7e4 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,329 @@ resolution-markers = [ "python_full_version >= '3.13'", ] +[manifest] +members = [ + "abx", + "abx-archivedotorg-extractor", + "abx-chrome-extractor", + "abx-curl-extractor", + "abx-favicon-extractor", + "abx-git-extractor", + "abx-htmltotext-extractor", + "abx-ldap-auth", + "abx-mercury-extractor", + "abx-plugin-default-binproviders", + "abx-plugin-npm-binprovider", + "abx-plugin-pip-binprovider", + "abx-plugin-playwright-binprovider", + "abx-pocket-extractor", + "abx-puppeteer-binprovider", + "abx-readability-extractor", + "abx-readwise-extractor", + "abx-ripgrep-search", + "abx-singlefile-extractor", + "abx-sonic-search", + "abx-spec-archivebox", + "abx-spec-config", + "abx-spec-django", + "abx-spec-extractor", + "abx-spec-pydantic-pkgr", + "abx-spec-searchbackend", + "abx-sqlitefts-search", + "abx-wget-extractor", + "abx-ytdlp-extractor", + "archivebox", + "archivebox-pocket", + "pydantic-pkgr", +] + +[[package]] +name = "abx" +version = "0.1.0" +source = { editable = "packages/abx" } +dependencies = [ + { name = "django" }, + { name = "pluggy" }, +] + +[package.metadata] +requires-dist = [ + { name = "django", specifier = ">=5.1.1,<6.0" }, + { name = "pluggy", specifier = ">=1.5.0" }, +] + +[[package]] +name = "abx-archivedotorg-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-archivedotorg-extractor" } + +[[package]] +name = "abx-chrome-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-chrome-extractor" } + +[[package]] +name = "abx-curl-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-curl-extractor" } + +[[package]] +name = "abx-favicon-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-favicon-extractor" } + +[[package]] +name = "abx-git-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-git-extractor" } + +[[package]] +name = "abx-htmltotext-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-htmltotext-extractor" } + +[[package]] +name = "abx-ldap-auth" +version = "0.1.0" +source = { editable = "packages/abx-plugin-ldap-auth" } + +[[package]] +name = "abx-mercury-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-mercury-extractor" } + +[[package]] +name = "abx-plugin-default-binproviders" +version = "2024.10.24" +source = { editable = "packages/abx-plugin-default-binproviders" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-npm-binprovider" +version = "2024.10.24" +source = { editable = "packages/abx-plugin-npm-binprovider" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "packages/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-pip-binprovider" +version = "2024.10.24" +source = { editable = "packages/abx-plugin-pip-binprovider" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "django" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "packages/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "django", specifier = ">=5.0.0" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-playwright-binprovider" +version = "2024.10.24" +source = { editable = "packages/abx-plugin-playwright-binprovider" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "abx-spec-config", editable = "packages/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "pydantic", specifier = ">=2.4.2" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, +] + +[[package]] +name = "abx-pocket-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-pocket-extractor" } + +[[package]] +name = "abx-puppeteer-binprovider" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-puppeteer-binprovider" } + +[[package]] +name = "abx-readability-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-readability-extractor" } + +[[package]] +name = "abx-readwise-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-readwise-extractor" } + +[[package]] +name = "abx-ripgrep-search" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-ripgrep-search" } + +[[package]] +name = "abx-singlefile-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-singlefile-extractor" } + +[[package]] +name = "abx-sonic-search" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-sonic-search" } + +[[package]] +name = "abx-spec-archivebox" +version = "0.1.0" +source = { editable = "packages/abx-spec-archivebox" } +dependencies = [ + { name = "abx" }, + { name = "django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "django", specifier = ">=5.1.1,<6.0" }, +] + +[[package]] +name = "abx-spec-config" +version = "0.0.1" +source = { editable = "packages/abx-spec-config" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "python-benedict" }, + { name = "rich" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "pydantic", specifier = ">=2.9.2" }, + { name = "pydantic-settings", specifier = ">=2.6.0" }, + { name = "python-benedict", specifier = ">=0.34.0" }, + { name = "rich", specifier = ">=13.9.3" }, +] + +[[package]] +name = "abx-spec-django" +version = "0.1.0" +source = { editable = "packages/abx-spec-django" } +dependencies = [ + { name = "abx" }, + { name = "django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "django", specifier = ">=5.1.1,<6.0" }, +] + +[[package]] +name = "abx-spec-extractor" +version = "0.1.0" +source = { editable = "packages/abx-spec-extractor" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "python-benedict" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "pydantic", specifier = ">=2.5.0" }, + { name = "python-benedict", specifier = ">=0.26.0" }, +] + +[[package]] +name = "abx-spec-pydantic-pkgr" +version = "0.1.0" +source = { editable = "packages/abx-spec-pydantic-pkgr" } +dependencies = [ + { name = "abx" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, +] + +[[package]] +name = "abx-spec-searchbackend" +version = "0.1.0" +source = { editable = "packages/abx-spec-searchbackend" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "python-benedict" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "pydantic", specifier = ">=2.5.0" }, + { name = "python-benedict", specifier = ">=0.26.0" }, +] + +[[package]] +name = "abx-sqlitefts-search" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-sqlitefts-search" } + +[[package]] +name = "abx-wget-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-wget-extractor" } + +[[package]] +name = "abx-ytdlp-extractor" +version = "0.1.0" +source = { virtual = "packages/abx-plugin-ytdlp-extractor" } + [[package]] name = "alabaster" version = "1.0.0" @@ -24,6 +347,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "ansible" +version = "10.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ansible-core" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d7/23/ae30b280ebad1f19fa012c0410aaf7d50cd741a5786bd60a2ecba42d2cd4/ansible-10.5.0.tar.gz", hash = "sha256:ba2045031a7d60c203b6e5fe1f8eaddd53ae076f7ada910e636494384135face", size = 40391062 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2e/33/4cb64286f44cd36753cd15ef636be6c9e40be331e14e97caca74cb7a3242/ansible-10.5.0-py3-none-any.whl", hash = "sha256:1d10bddba58f1edd0fe0b8e0387e0fafc519535066bb3c919c33b6ea3ec32a0f", size = 48977627 }, +] + +[[package]] +name = "ansible-core" +version = "2.17.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "resolvelib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/96/02a6d1d16ef3b08d53e23db519fbb31641b2767404b674f3ea71c7c1ac3b/ansible_core-2.17.5.tar.gz", hash = "sha256:ae7f51fd13dc9d57c9bcd43ef23f9c255ca8f18f4b5c0011a4f9b724d92c5a8e", size = 3097858 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/4f/5c344dc52327766fb286771d492481c2c60eace9697497b250e1d79b1e40/ansible_core-2.17.5-py3-none-any.whl", hash = "sha256:10f165b475cf2bc8d886e532cadb32c52ee6a533649793101d3166bca9bd3ea3", size = 2193938 }, +] + +[[package]] +name = "ansible-runner" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "pexpect" }, + { name = "python-daemon" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e0/b4/842698d5c17b3cae7948df4c812e01f4199dfb9f35b1c0bb51cf2fe5c246/ansible-runner-2.4.0.tar.gz", hash = "sha256:82d02b2548830f37a53517b65c823c4af371069406c7d213b5c9041d45e0c5b6", size = 148802 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/58/46/44577e2e58de8b9c9398e1ee08b6c697bb2581446209cbfd2639cced66f5/ansible_runner-2.4.0-py3-none-any.whl", hash = "sha256:a3f592ae4cdfa62a72ad15de60da9c8210f376d67f495c4a78d4cf1dc7ccdf89", size = 79678 }, +] + [[package]] name = "anyio" version = "4.6.2.post1" @@ -41,9 +407,20 @@ wheels = [ [[package]] name = "archivebox" -version = "0.8.5rc50" +version = "0.8.5rc53" source = { editable = "." } dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-plugin-npm-binprovider" }, + { name = "abx-plugin-pip-binprovider" }, + { name = "abx-plugin-playwright-binprovider" }, + { name = "abx-spec-archivebox" }, + { name = "abx-spec-config" }, + { name = "abx-spec-django" }, + { name = "abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend" }, { name = "atomicwrites" }, { name = "base32-crockford" }, { name = "channels", extra = ["daphne"] }, @@ -79,6 +456,7 @@ dependencies = [ { name = "sonic-client" }, { name = "supervisor" }, { name = "typeid-python" }, + { name = "typing-extensions" }, { name = "ulid-py" }, { name = "w3lib" }, { name = "yt-dlp" }, @@ -122,6 +500,17 @@ dev = [ [package.metadata] requires-dist = [ + { name = "abx", editable = "packages/abx" }, + { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, + { name = "abx-plugin-npm-binprovider", editable = "packages/abx-plugin-npm-binprovider" }, + { name = "abx-plugin-pip-binprovider", editable = "packages/abx-plugin-pip-binprovider" }, + { name = "abx-plugin-playwright-binprovider", editable = "packages/abx-plugin-playwright-binprovider" }, + { name = "abx-spec-archivebox", editable = "packages/abx-spec-archivebox" }, + { name = "abx-spec-config", editable = "packages/abx-spec-config" }, + { name = "abx-spec-django", editable = "packages/abx-spec-django" }, + { name = "abx-spec-extractor", editable = "packages/abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "packages/abx-spec-searchbackend" }, { name = "archivebox", extras = ["sonic", "ldap"], marker = "extra == 'all'" }, { name = "atomicwrites", specifier = "==1.4.1" }, { name = "base32-crockford", specifier = "==0.3.0" }, @@ -148,7 +537,7 @@ requires-dist = [ { name = "pluggy", specifier = ">=1.5.0" }, { name = "psutil", specifier = ">=6.0.0" }, { name = "py-machineid", specifier = ">=0.6.0" }, - { name = "pydantic-pkgr", specifier = ">=0.5.4" }, + { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, { name = "pydantic-settings", specifier = ">=2.5.2" }, { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" }, { name = "python-crontab", specifier = ">=3.2.0" }, @@ -160,6 +549,7 @@ requires-dist = [ { name = "sonic-client", specifier = ">=1.0.0" }, { name = "supervisor", specifier = ">=4.2.5" }, { name = "typeid-python", specifier = ">=0.3.1" }, + { name = "typing-extensions", specifier = ">=4.12.2" }, { name = "ulid-py", specifier = ">=1.1.0" }, { name = "w3lib", specifier = ">=2.2.1" }, { name = "yt-dlp", specifier = ">=2024.8.6" }, @@ -184,13 +574,24 @@ dev = [ { name = "requests-tracker", specifier = ">=0.3.3" }, { name = "ruff", specifier = ">=0.6.6" }, { name = "setuptools", specifier = ">=75.1.0" }, - { name = "sphinx" }, + { name = "sphinx", specifier = ">=8.1.3" }, { name = "sphinx-rtd-theme", specifier = ">=2.0.0" }, - { name = "uv" }, - { name = "viztracer" }, + { name = "uv", specifier = ">=0.4.26" }, + { name = "viztracer", specifier = ">=0.17.0" }, { name = "wheel", specifier = ">=0.44.0" }, ] +[[package]] +name = "archivebox-pocket" +version = "0.3.7" +source = { editable = "packages/archivebox-pocket" } +dependencies = [ + { name = "requests" }, +] + +[package.metadata] +requires-dist = [{ name = "requests", specifier = ">=2.32.3" }] + [[package]] name = "asgiref" version = "3.8.1" @@ -272,6 +673,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/6f/7ad1176c56c920e9841b14923f81545a4243876628312f143915561770d2/base32_crockford-0.3.0-py2.py3-none-any.whl", hash = "sha256:295ef5ffbf6ed96b6e739ffd36be98fa7e90a206dd18c39acefb15777eedfe6e", size = 5050 }, ] +[[package]] +name = "bcrypt" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/7e/d95e7d96d4828e965891af92e43b52a4cd3395dc1c1ef4ee62748d0471d0/bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221", size = 24294 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/81/4e8f5bc0cd947e91fb720e1737371922854da47a94bc9630454e7b2845f8/bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb", size = 471568 }, + { url = "https://files.pythonhosted.org/packages/05/d2/1be1e16aedec04bcf8d0156e01b987d16a2063d38e64c3f28030a3427d61/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00", size = 277372 }, + { url = "https://files.pythonhosted.org/packages/e3/96/7a654027638ad9b7589effb6db77eb63eba64319dfeaf9c0f4ca953e5f76/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d", size = 273488 }, + { url = "https://files.pythonhosted.org/packages/46/54/dc7b58abeb4a3d95bab653405935e27ba32f21b812d8ff38f271fb6f7f55/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291", size = 277759 }, + { url = "https://files.pythonhosted.org/packages/ac/be/da233c5f11fce3f8adec05e8e532b299b64833cc962f49331cdd0e614fa9/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328", size = 273796 }, + { url = "https://files.pythonhosted.org/packages/b0/b8/8b4add88d55a263cf1c6b8cf66c735280954a04223fcd2880120cc767ac3/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7", size = 311082 }, + { url = "https://files.pythonhosted.org/packages/7b/76/2aa660679abbdc7f8ee961552e4bb6415a81b303e55e9374533f22770203/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399", size = 305912 }, + { url = "https://files.pythonhosted.org/packages/00/03/2af7c45034aba6002d4f2b728c1a385676b4eab7d764410e34fd768009f2/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060", size = 325185 }, + { url = "https://files.pythonhosted.org/packages/dc/5d/6843443ce4ab3af40bddb6c7c085ed4a8418b3396f7a17e60e6d9888416c/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7", size = 335188 }, + { url = "https://files.pythonhosted.org/packages/cb/4c/ff8ca83d816052fba36def1d24e97d9a85739b9bbf428c0d0ecd296a07c8/bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458", size = 156481 }, + { url = "https://files.pythonhosted.org/packages/65/f1/e09626c88a56cda488810fb29d5035f1662873777ed337880856b9d204ae/bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5", size = 151336 }, + { url = "https://files.pythonhosted.org/packages/96/86/8c6a84daed4dd878fbab094400c9174c43d9b838ace077a2f8ee8bc3ae12/bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841", size = 472414 }, + { url = "https://files.pythonhosted.org/packages/f6/05/e394515f4e23c17662e5aeb4d1859b11dc651be01a3bd03c2e919a155901/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68", size = 277599 }, + { url = "https://files.pythonhosted.org/packages/4b/3b/ad784eac415937c53da48983756105d267b91e56aa53ba8a1b2014b8d930/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe", size = 273491 }, + { url = "https://files.pythonhosted.org/packages/cc/14/b9ff8e0218bee95e517b70e91130effb4511e8827ac1ab00b4e30943a3f6/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2", size = 277934 }, + { url = "https://files.pythonhosted.org/packages/3e/d0/31938bb697600a04864246acde4918c4190a938f891fd11883eaaf41327a/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c", size = 273804 }, + { url = "https://files.pythonhosted.org/packages/e7/c3/dae866739989e3f04ae304e1201932571708cb292a28b2f1b93283e2dcd8/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae", size = 311275 }, + { url = "https://files.pythonhosted.org/packages/5d/2c/019bc2c63c6125ddf0483ee7d914a405860327767d437913942b476e9c9b/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d", size = 306355 }, + { url = "https://files.pythonhosted.org/packages/75/fe/9e137727f122bbe29771d56afbf4e0dbc85968caa8957806f86404a5bfe1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e", size = 325381 }, + { url = "https://files.pythonhosted.org/packages/1a/d4/586b9c18a327561ea4cd336ff4586cca1a7aa0f5ee04e23a8a8bb9ca64f1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8", size = 335685 }, + { url = "https://files.pythonhosted.org/packages/24/55/1a7127faf4576138bb278b91e9c75307490178979d69c8e6e273f74b974f/bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34", size = 155857 }, + { url = "https://files.pythonhosted.org/packages/1c/2a/c74052e54162ec639266d91539cca7cbf3d1d3b8b36afbfeaee0ea6a1702/bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9", size = 151717 }, + { url = "https://files.pythonhosted.org/packages/09/97/01026e7b1b7f8aeb41514408eca1137c0f8aef9938335e3bc713f82c282e/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a", size = 275924 }, + { url = "https://files.pythonhosted.org/packages/ca/46/03eb26ea3e9c12ca18d1f3bf06199f7d72ce52e68f2a1ebcfd8acff9c472/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db", size = 272242 }, +] + [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -561,6 +994,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 }, ] +[[package]] +name = "click" +version = "8.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "platform_system == 'Windows'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -579,6 +1024,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/92/dfd892312d822f36c55366118b95d914e5f16de11044a27cf10a7d71bbbf/commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9", size = 51068 }, ] +[[package]] +name = "configparser" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a5/2e/a8d83652990ecb5df54680baa0c53d182051d9e164a25baa0582363841d1/configparser-7.1.0.tar.gz", hash = "sha256:eb82646c892dbdf773dae19c633044d163c3129971ae09b49410a303b8e0a5f7", size = 50122 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/df/1514580907b0bac0970415e5e24ef96a9c1fa71dcf2aa0139045b58fae9a/configparser-7.1.0-py3-none-any.whl", hash = "sha256:98e374573c4e10e92399651e3ba1c47a438526d633c44ee96143dec26dad4299", size = 17074 }, +] + [[package]] name = "constantly" version = "23.10.4" @@ -684,6 +1138,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/8d/778b7d51b981a96554f29136cd59ca7880bf58094338085bcf2a979a0e6a/Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c", size = 9561 }, ] +[[package]] +name = "distro" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, +] + [[package]] name = "django" version = "5.1.2" @@ -1001,6 +1464,53 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/0f/d8a8152e720cbcad890e56ee98639ff489f1992869b4cf304c3fa24d4bcc/ftfy-6.3.0-py3-none-any.whl", hash = "sha256:17aca296801f44142e3ff2c16f93fbf6a87609ebb3704a9a41dd5d4903396caf", size = 44778 }, ] +[[package]] +name = "gevent" +version = "24.10.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi", marker = "platform_python_implementation == 'CPython' and sys_platform == 'win32'" }, + { name = "greenlet", marker = "platform_python_implementation == 'CPython'" }, + { name = "zope-event" }, + { name = "zope-interface" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/70/f0/be10ed5d7721ed2317d7feb59e167603217156c2a6d57f128523e24e673d/gevent-24.10.3.tar.gz", hash = "sha256:aa7ee1bd5cabb2b7ef35105f863b386c8d5e332f754b60cfc354148bd70d35d1", size = 6108837 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/6f/a2100e7883c7bdfc2b45cb60b310ca748762a21596258b9dd01c5c093dbc/gevent-24.10.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:d7a1ad0f2da582f5bd238bca067e1c6c482c30c15a6e4d14aaa3215cbb2232f3", size = 3014382 }, + { url = "https://files.pythonhosted.org/packages/7a/b1/460e4884ed6185d9eb9c4c2e9639d2b254197e46513301c0f63dec22dc90/gevent-24.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4e526fdc279c655c1e809b0c34b45844182c2a6b219802da5e411bd2cf5a8ad", size = 4853460 }, + { url = "https://files.pythonhosted.org/packages/ca/f6/7ded98760d381229183ecce8db2edcce96f13e23807d31a90c66dae85304/gevent-24.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57a5c4e0bdac482c5f02f240d0354e61362df73501ef6ebafce8ef635cad7527", size = 4977636 }, + { url = "https://files.pythonhosted.org/packages/7d/21/7b928e6029eedb93ef94fc0aee701f497af2e601f0ec00aac0e72e3f450e/gevent-24.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d67daed8383326dc8b5e58d88e148d29b6b52274a489e383530b0969ae7b9cb9", size = 5058031 }, + { url = "https://files.pythonhosted.org/packages/00/98/12c03fd004fbeeca01276ffc589f5a368fd741d02582ab7006d1bdef57e7/gevent-24.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e24ffea72e27987979c009536fd0868e52239b44afe6cf7135ce8aafd0f108e", size = 6683694 }, + { url = "https://files.pythonhosted.org/packages/64/4c/ea14d971452d3da09e49267e052d8312f112c7835120aed78d22ef14efee/gevent-24.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c1d80090485da1ea3d99205fe97908b31188c1f4857f08b333ffaf2de2e89d18", size = 5286063 }, + { url = "https://files.pythonhosted.org/packages/39/3f/397efff27e637d7306caa00d1560512c44028c25c70be1e72c46b79b1b66/gevent-24.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0c129f81d60cda614acb4b0c5731997ca05b031fb406fcb58ad53a7ade53b13", size = 6817462 }, + { url = "https://files.pythonhosted.org/packages/aa/5d/19939eaa7c5b7c0f37e0a0665a911ddfe1e35c25c512446fc356a065c16e/gevent-24.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:26ca7a6b42d35129617025ac801135118333cad75856ffc3217b38e707383eba", size = 1566631 }, + { url = "https://files.pythonhosted.org/packages/6e/01/1be5cf013826d8baae235976d6a94f3628014fd2db7c071aeec13f82b4d1/gevent-24.10.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:68c3a0d8402755eba7f69022e42e8021192a721ca8341908acc222ea597029b6", size = 2966909 }, + { url = "https://files.pythonhosted.org/packages/fe/3e/7fa9ab023f24d8689e2c77951981f8ea1f25089e0349a0bf8b35ee9b9277/gevent-24.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d850a453d66336272be4f1d3a8126777f3efdaea62d053b4829857f91e09755", size = 4913247 }, + { url = "https://files.pythonhosted.org/packages/db/63/6e40eaaa3c2abd1561faff11dc3e6781f8c25e975354b8835762834415af/gevent-24.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e58ee3723f1fbe07d66892f1caa7481c306f653a6829b6fd16cb23d618a5915", size = 5049036 }, + { url = "https://files.pythonhosted.org/packages/94/89/158bc32cdc898dda0481040ac18650022e73133d93460c5af56ca622fe9a/gevent-24.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b52382124eca13135a3abe4f65c6bd428656975980a48e51b17aeab68bdb14db", size = 5107299 }, + { url = "https://files.pythonhosted.org/packages/64/91/1abe62ee350fdfac186d33f615d0d3a0b3b140e7ccf23c73547aa0deec44/gevent-24.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ca2266e08f43c0e22c028801dff7d92a0b102ef20e4caeb6a46abfb95f6a328", size = 6819625 }, + { url = "https://files.pythonhosted.org/packages/92/8b/0b2fe0d36b7c4d463e46cc68eaf6c14488bd7d86cc37e995c64a0ff7d02f/gevent-24.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d758f0d4dbf32502ec87bb9b536ca8055090a16f8305f0ada3ce6f34e70f2fd7", size = 5474079 }, + { url = "https://files.pythonhosted.org/packages/12/7b/9f5abbf0021a50321314f850697e0f46d2e5081168223af2d8544af9d19f/gevent-24.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0de6eb3d55c03138fda567d9bfed28487ce5d0928c5107549767a93efdf2be26", size = 6901323 }, + { url = "https://files.pythonhosted.org/packages/8a/63/607715c621ae78ed581b7ba36d076df63feeb352993d521327f865056771/gevent-24.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:385710355eadecdb70428a5ae3e7e5a45dcf888baa1426884588be9d25ac4290", size = 1549468 }, + { url = "https://files.pythonhosted.org/packages/d9/e4/4edbe17001bb3e6fade4ad2d85ca8f9e4eabcbde4aa29aa6889281616e3e/gevent-24.10.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3ad8fb70aa0ebc935729c9699ac31b210a49b689a7b27b7ac9f91676475f3f53", size = 2970952 }, + { url = "https://files.pythonhosted.org/packages/3c/a6/ce0824fe9398ba6b00028a74840f12be1165d5feaacdc028ea953db3d6c3/gevent-24.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f18689f7a70d2ed0e75bad5036ec3c89690a493d4cfac8d7cdb258ac04b132bd", size = 5172230 }, + { url = "https://files.pythonhosted.org/packages/25/d4/9002cfb585bfa52c860ed4b1349d1a6400bdf2df9f1bd21df5ff33eea33c/gevent-24.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f4f171d4d2018170454d84c934842e1b5f6ce7468ba298f6e7f7cff15000a3", size = 5338394 }, + { url = "https://files.pythonhosted.org/packages/0c/98/222f1a14f22ad2d1cbcc37edb74095264c1f9c7ab49e6423693383462b8a/gevent-24.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7021e26d70189b33c27173d4173f27bf4685d6b6f1c0ea50e5335f8491cb110c", size = 5437989 }, + { url = "https://files.pythonhosted.org/packages/bf/e8/cbb46afea3c7ecdc7289e15cb4a6f89903f4f9754a27ca320d3e465abc78/gevent-24.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34aea15f9c79f27a8faeaa361bc1e72c773a9b54a1996a2ec4eefc8bcd59a824", size = 6838539 }, + { url = "https://files.pythonhosted.org/packages/69/c3/e43e348f23da404a6d4368a14453ed097cdfca97d5212eaceb987d04a0e1/gevent-24.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8af65a4d4feaec6042c666d22c322a310fba3b47e841ad52f724b9c3ce5da48e", size = 5513842 }, + { url = "https://files.pythonhosted.org/packages/c2/76/84b7c19c072a80900118717a85236859127d630cdf8b079fe42f19649f12/gevent-24.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:89c4115e3f5ada55f92b61701a46043fe42f702b5af863b029e4c1a76f6cc2d4", size = 6927374 }, + { url = "https://files.pythonhosted.org/packages/5e/69/0ab1b04c363547058fb5035275c144957b80b36cb6aee715fe6181b0cee9/gevent-24.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:1ce6dab94c0b0d24425ba55712de2f8c9cb21267150ca63f5bb3a0e1f165da99", size = 1546701 }, + { url = "https://files.pythonhosted.org/packages/f7/2d/c783583d7999cd2f2e7aa2d6a1c333d663003ca61255a89ff6a891be95f4/gevent-24.10.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:f147e38423fbe96e8731f60a63475b3d2cab2f3d10578d8ee9d10c507c58a2ff", size = 2962857 }, + { url = "https://files.pythonhosted.org/packages/f3/77/d3ce96fd49406f61976e9a3b6c742b97bb274d3b30c68ff190c5b5f81afd/gevent-24.10.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18e6984ec96fc95fd67488555c38ece3015be1f38b1bcceb27b7d6c36b343008", size = 5141676 }, + { url = "https://files.pythonhosted.org/packages/49/f4/f99f893770c316b9d2f03bd684947126cbed0321b89fe5423838974c2025/gevent-24.10.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:051b22e2758accfddb0457728bfc9abf8c3f2ce6bca43f1ff6e07b5ed9e49bf4", size = 5310248 }, + { url = "https://files.pythonhosted.org/packages/e3/0c/67257ba906f76ed82e8f0bd8c00c2a0687b360a1050b70db7e58dff749ab/gevent-24.10.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb5edb6433764119a664bbb148d2aea9990950aa89cc3498f475c2408d523ea3", size = 5407304 }, + { url = "https://files.pythonhosted.org/packages/35/6c/3a72da7c224b0111728130c0f1abc3ee07feff91b37e0ea83db98f4a3eaf/gevent-24.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce417bcaaab496bc9c77f75566531e9d93816262037b8b2dbb88b0fdcd66587c", size = 6818624 }, + { url = "https://files.pythonhosted.org/packages/a3/96/cc5f6ecba032a45fc312fe0db2908a893057fd81361eea93845d6c325556/gevent-24.10.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1c3a828b033fb02b7c31da4d75014a1f82e6c072fc0523456569a57f8b025861", size = 5484356 }, + { url = "https://files.pythonhosted.org/packages/7c/97/e680b2b2f0c291ae4db9813ffbf02c22c2a0f14c8f1a613971385e29ef67/gevent-24.10.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f2ae3efbbd120cdf4a68b7abc27a37e61e6f443c5a06ec2c6ad94c37cd8471ec", size = 6903191 }, + { url = "https://files.pythonhosted.org/packages/1b/1c/b4181957da062d1c060974ec6cb798cc24aeeb28e8cd2ece84eb4b4991f7/gevent-24.10.3-cp313-cp313-win_amd64.whl", hash = "sha256:9e1210334a9bc9f76c3d008e0785ca62214f8a54e1325f6c2ecab3b6a572a015", size = 1545117 }, + { url = "https://files.pythonhosted.org/packages/89/2b/bf4af9950b8f9abd5b4025858f6311930de550e3498bbfeb47c914701a1d/gevent-24.10.3-pp310-pypy310_pp73-macosx_11_0_universal2.whl", hash = "sha256:e534e6a968d74463b11de6c9c67f4b4bf61775fb00f2e6e0f7fcdd412ceade18", size = 1271541 }, +] + [[package]] name = "googleapis-common-protos" version = "1.65.0" @@ -1013,6 +1523,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/08/49bfe7cf737952cc1a9c43e80cc258ed45dad7f183c5b8276fc94cb3862d/googleapis_common_protos-1.65.0-py2.py3-none-any.whl", hash = "sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63", size = 220890 }, ] +[[package]] +name = "greenlet" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2f/ff/df5fede753cc10f6a5be0931204ea30c35fa2f2ea7a35b25bdaf4fe40e46/greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467", size = 186022 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/90/5234a78dc0ef6496a6eb97b67a42a8e96742a56f7dc808cb954a85390448/greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563", size = 271235 }, + { url = "https://files.pythonhosted.org/packages/7c/16/cd631fa0ab7d06ef06387135b7549fdcc77d8d859ed770a0d28e47b20972/greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83", size = 637168 }, + { url = "https://files.pythonhosted.org/packages/2f/b1/aed39043a6fec33c284a2c9abd63ce191f4f1a07319340ffc04d2ed3256f/greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0", size = 648826 }, + { url = "https://files.pythonhosted.org/packages/76/25/40e0112f7f3ebe54e8e8ed91b2b9f970805143efef16d043dfc15e70f44b/greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120", size = 644443 }, + { url = "https://files.pythonhosted.org/packages/fb/2f/3850b867a9af519794784a7eeed1dd5bc68ffbcc5b28cef703711025fd0a/greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc", size = 643295 }, + { url = "https://files.pythonhosted.org/packages/cf/69/79e4d63b9387b48939096e25115b8af7cd8a90397a304f92436bcb21f5b2/greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617", size = 599544 }, + { url = "https://files.pythonhosted.org/packages/46/1d/44dbcb0e6c323bd6f71b8c2f4233766a5faf4b8948873225d34a0b7efa71/greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7", size = 1125456 }, + { url = "https://files.pythonhosted.org/packages/e0/1d/a305dce121838d0278cee39d5bb268c657f10a5363ae4b726848f833f1bb/greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6", size = 1149111 }, + { url = "https://files.pythonhosted.org/packages/96/28/d62835fb33fb5652f2e98d34c44ad1a0feacc8b1d3f1aecab035f51f267d/greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80", size = 298392 }, + { url = "https://files.pythonhosted.org/packages/28/62/1c2665558618553c42922ed47a4e6d6527e2fa3516a8256c2f431c5d0441/greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70", size = 272479 }, + { url = "https://files.pythonhosted.org/packages/76/9d/421e2d5f07285b6e4e3a676b016ca781f63cfe4a0cd8eaecf3fd6f7a71ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159", size = 640404 }, + { url = "https://files.pythonhosted.org/packages/e5/de/6e05f5c59262a584e502dd3d261bbdd2c97ab5416cc9c0b91ea38932a901/greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e", size = 652813 }, + { url = "https://files.pythonhosted.org/packages/49/93/d5f93c84241acdea15a8fd329362c2c71c79e1a507c3f142a5d67ea435ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1", size = 648517 }, + { url = "https://files.pythonhosted.org/packages/15/85/72f77fc02d00470c86a5c982b8daafdf65d38aefbbe441cebff3bf7037fc/greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383", size = 647831 }, + { url = "https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a", size = 602413 }, + { url = "https://files.pythonhosted.org/packages/76/70/ad6e5b31ef330f03b12559d19fda2606a522d3849cde46b24f223d6d1619/greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511", size = 1129619 }, + { url = "https://files.pythonhosted.org/packages/f4/fb/201e1b932e584066e0f0658b538e73c459b34d44b4bd4034f682423bc801/greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395", size = 1155198 }, + { url = "https://files.pythonhosted.org/packages/12/da/b9ed5e310bb8b89661b80cbcd4db5a067903bbcd7fc854923f5ebb4144f0/greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39", size = 298930 }, + { url = "https://files.pythonhosted.org/packages/7d/ec/bad1ac26764d26aa1353216fcbfa4670050f66d445448aafa227f8b16e80/greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d", size = 274260 }, + { url = "https://files.pythonhosted.org/packages/66/d4/c8c04958870f482459ab5956c2942c4ec35cac7fe245527f1039837c17a9/greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79", size = 649064 }, + { url = "https://files.pythonhosted.org/packages/51/41/467b12a8c7c1303d20abcca145db2be4e6cd50a951fa30af48b6ec607581/greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa", size = 663420 }, + { url = "https://files.pythonhosted.org/packages/27/8f/2a93cd9b1e7107d5c7b3b7816eeadcac2ebcaf6d6513df9abaf0334777f6/greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441", size = 658035 }, + { url = "https://files.pythonhosted.org/packages/57/5c/7c6f50cb12be092e1dccb2599be5a942c3416dbcfb76efcf54b3f8be4d8d/greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36", size = 660105 }, + { url = "https://files.pythonhosted.org/packages/f1/66/033e58a50fd9ec9df00a8671c74f1f3a320564c6415a4ed82a1c651654ba/greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9", size = 613077 }, + { url = "https://files.pythonhosted.org/packages/19/c5/36384a06f748044d06bdd8776e231fadf92fc896bd12cb1c9f5a1bda9578/greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0", size = 1135975 }, + { url = "https://files.pythonhosted.org/packages/38/f9/c0a0eb61bdf808d23266ecf1d63309f0e1471f284300ce6dac0ae1231881/greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942", size = 1163955 }, + { url = "https://files.pythonhosted.org/packages/43/21/a5d9df1d21514883333fc86584c07c2b49ba7c602e670b174bd73cfc9c7f/greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01", size = 299655 }, + { url = "https://files.pythonhosted.org/packages/f3/57/0db4940cd7bb461365ca8d6fd53e68254c9dbbcc2b452e69d0d41f10a85e/greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1", size = 272990 }, + { url = "https://files.pythonhosted.org/packages/1c/ec/423d113c9f74e5e402e175b157203e9102feeb7088cee844d735b28ef963/greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff", size = 649175 }, + { url = "https://files.pythonhosted.org/packages/a9/46/ddbd2db9ff209186b7b7c621d1432e2f21714adc988703dbdd0e65155c77/greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a", size = 663425 }, + { url = "https://files.pythonhosted.org/packages/bc/f9/9c82d6b2b04aa37e38e74f0c429aece5eeb02bab6e3b98e7db89b23d94c6/greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e", size = 657736 }, + { url = "https://files.pythonhosted.org/packages/d9/42/b87bc2a81e3a62c3de2b0d550bf91a86939442b7ff85abb94eec3fc0e6aa/greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4", size = 660347 }, + { url = "https://files.pythonhosted.org/packages/37/fa/71599c3fd06336cdc3eac52e6871cfebab4d9d70674a9a9e7a482c318e99/greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e", size = 615583 }, + { url = "https://files.pythonhosted.org/packages/4e/96/e9ef85de031703ee7a4483489b40cf307f93c1824a02e903106f2ea315fe/greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1", size = 1133039 }, + { url = "https://files.pythonhosted.org/packages/87/76/b2b6362accd69f2d1889db61a18c94bc743e961e3cab344c2effaa4b4a25/greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c", size = 1160716 }, + { url = "https://files.pythonhosted.org/packages/1f/1b/54336d876186920e185066d8c3024ad55f21d7cc3683c856127ddb7b13ce/greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761", size = 299490 }, + { url = "https://files.pythonhosted.org/packages/5f/17/bea55bf36990e1638a2af5ba10c1640273ef20f627962cf97107f1e5d637/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011", size = 643731 }, + { url = "https://files.pythonhosted.org/packages/78/d2/aa3d2157f9ab742a08e0fd8f77d4699f37c22adfbfeb0c610a186b5f75e0/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13", size = 649304 }, + { url = "https://files.pythonhosted.org/packages/f1/8e/d0aeffe69e53ccff5a28fa86f07ad1d2d2d6537a9506229431a2a02e2f15/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475", size = 646537 }, + { url = "https://files.pythonhosted.org/packages/05/79/e15408220bbb989469c8871062c97c6c9136770657ba779711b90870d867/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b", size = 642506 }, + { url = "https://files.pythonhosted.org/packages/18/87/470e01a940307796f1d25f8167b551a968540fbe0551c0ebb853cb527dd6/greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822", size = 602753 }, + { url = "https://files.pythonhosted.org/packages/e2/72/576815ba674eddc3c25028238f74d7b8068902b3968cbe456771b166455e/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01", size = 1122731 }, + { url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112 }, +] + [[package]] name = "h11" version = "0.14.0" @@ -1229,6 +1790,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/9f/5b5481d716670ed5fbd8d06dfa94b7108272b645da2f2406eb909cb6a450/libcst-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:4d6acb0bdee1e55b44c6215c59755ec4693ac01e74bb1fde04c37358b378835d", size = 2029600 }, ] +[[package]] +name = "lockfile" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/17/47/72cb04a58a35ec495f96984dddb48232b551aafb95bde614605b754fe6f7/lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799", size = 20874 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/22/9460e311f340cb62d26a38c419b1381b8593b0bb6b5d1f056938b086d362/lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa", size = 13564 }, +] + [[package]] name = "logfire" version = "1.2.0" @@ -1370,36 +1940,36 @@ wheels = [ [[package]] name = "mypy" -version = "1.12.1" +version = "1.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/03/744330105a74dc004578f47ec27e1bf66b1dd5664ea444d18423e41343bd/mypy-1.12.1.tar.gz", hash = "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d", size = 3150767 } +sdist = { url = "https://files.pythonhosted.org/packages/e8/21/7e9e523537991d145ab8a0a2fd98548d67646dc2aaaf6091c31ad883e7c1/mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", size = 3152532 } wheels = [ - { url = "https://files.pythonhosted.org/packages/16/90/3a83d3bcff2eb85151723f116336bd545995b5260a49d3e0d95213fcc2d7/mypy-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801", size = 11017908 }, - { url = "https://files.pythonhosted.org/packages/e4/5c/d6b32ddde2460fc63168ca0f7bf44f38474353547f7c0304a30023c40aa0/mypy-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5", size = 10184164 }, - { url = "https://files.pythonhosted.org/packages/42/5e/680aa37c938e6db23bd7e6dd4d38d7e609998491721e453b32ec10d31e7f/mypy-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1", size = 12587852 }, - { url = "https://files.pythonhosted.org/packages/9e/0f/9cafea1c3aaf852cfa1d4a387f33923b6d9714b5c16eb0469da67c5c31e4/mypy-1.12.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627", size = 13106489 }, - { url = "https://files.pythonhosted.org/packages/ea/c3/7f56d5d87a81e665de8dfa424120ab3a6954ae5854946cec0a46f78f6168/mypy-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20", size = 9634753 }, - { url = "https://files.pythonhosted.org/packages/18/0a/70de7c97a86cb85535077ab5cef1cbc4e2812fd2e9cc21d78eb561a6b80f/mypy-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735", size = 10940998 }, - { url = "https://files.pythonhosted.org/packages/c0/97/9ed6d4834d7549936ab88533b302184fb568a0940c4000d2aaee6dc07112/mypy-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66", size = 10108523 }, - { url = "https://files.pythonhosted.org/packages/48/41/1686f37d09c915dfc5b683e20cc99dabac199900b5ca6d22747b99ddcb50/mypy-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6", size = 12505553 }, - { url = "https://files.pythonhosted.org/packages/8d/2b/2dbcaa7e97b23f27ced77493256ee878f4a140ac750e198630ff1b9b60c6/mypy-1.12.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931", size = 12988634 }, - { url = "https://files.pythonhosted.org/packages/54/55/710d082e91a2ccaea21214229b11f9215a9d22446f949491b5457655e82b/mypy-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811", size = 9630747 }, - { url = "https://files.pythonhosted.org/packages/8a/74/b9e0e4f06e951e277058f878302faa154d282ca11274c59fe08353f52949/mypy-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f", size = 11079902 }, - { url = "https://files.pythonhosted.org/packages/9f/62/fcad290769db3eb0de265094cef5c94d6075c70bc1e42b67eee4ca192dcc/mypy-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0", size = 10072373 }, - { url = "https://files.pythonhosted.org/packages/cb/27/9ac78349c2952e4446288ec1174675ab9e0160ed18c2cb1154fa456c54e8/mypy-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042", size = 12589779 }, - { url = "https://files.pythonhosted.org/packages/7c/4a/58cebd122cf1cba95680ac51303fbeb508392413ca64e3e711aa7d4877aa/mypy-1.12.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179", size = 13044459 }, - { url = "https://files.pythonhosted.org/packages/5b/c7/672935e2a3f9bcc07b1b870395a653f665657bef3cdaa504ad99f56eadf0/mypy-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a", size = 9731919 }, - { url = "https://files.pythonhosted.org/packages/bb/b0/092be5094840a401940c95224f63bb2a8f09bce9251ac1df180ec523830c/mypy-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc", size = 11068611 }, - { url = "https://files.pythonhosted.org/packages/9a/86/f20f53b8f062876c39602243d7a59b5cabd6b24315d8de511d607fa4de6a/mypy-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635", size = 10068036 }, - { url = "https://files.pythonhosted.org/packages/84/c7/1dbd6575785522da1d4c1ac2c419505fcf23bee74811880cac447a4a77ab/mypy-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81", size = 12585671 }, - { url = "https://files.pythonhosted.org/packages/46/8a/f6ae18b446eb2bccce54c4bd94065bcfe417d6c67021dcc032bf1e720aff/mypy-1.12.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4", size = 13036083 }, - { url = "https://files.pythonhosted.org/packages/59/e6/fc65fde3dc7156fce8d49ba21c7b1f5d866ad50467bf196ca94a7f6d2c9e/mypy-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02", size = 9735467 }, - { url = "https://files.pythonhosted.org/packages/84/6b/1db9de4e0764778251fb2d64cb7455cf6db75dc99c9f72c8b7e74b6a8a17/mypy-1.12.1-py3-none-any.whl", hash = "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e", size = 2646060 }, + { url = "https://files.pythonhosted.org/packages/5e/8c/206de95a27722b5b5a8c85ba3100467bd86299d92a4f71c6b9aa448bfa2f/mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a", size = 11020731 }, + { url = "https://files.pythonhosted.org/packages/ab/bb/b31695a29eea76b1569fd28b4ab141a1adc9842edde080d1e8e1776862c7/mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80", size = 10184276 }, + { url = "https://files.pythonhosted.org/packages/a5/2d/4a23849729bb27934a0e079c9c1aad912167d875c7b070382a408d459651/mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7", size = 12587706 }, + { url = "https://files.pythonhosted.org/packages/5c/c3/d318e38ada50255e22e23353a469c791379825240e71b0ad03e76ca07ae6/mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f", size = 13105586 }, + { url = "https://files.pythonhosted.org/packages/4a/25/3918bc64952370c3dbdbd8c82c363804678127815febd2925b7273d9482c/mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372", size = 9632318 }, + { url = "https://files.pythonhosted.org/packages/d0/19/de0822609e5b93d02579075248c7aa6ceaddcea92f00bf4ea8e4c22e3598/mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d", size = 10939027 }, + { url = "https://files.pythonhosted.org/packages/c8/71/6950fcc6ca84179137e4cbf7cf41e6b68b4a339a1f5d3e954f8c34e02d66/mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d", size = 10108699 }, + { url = "https://files.pythonhosted.org/packages/26/50/29d3e7dd166e74dc13d46050b23f7d6d7533acf48f5217663a3719db024e/mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b", size = 12506263 }, + { url = "https://files.pythonhosted.org/packages/3f/1d/676e76f07f7d5ddcd4227af3938a9c9640f293b7d8a44dd4ff41d4db25c1/mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73", size = 12984688 }, + { url = "https://files.pythonhosted.org/packages/9c/03/5a85a30ae5407b1d28fab51bd3e2103e52ad0918d1e68f02a7778669a307/mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca", size = 9626811 }, + { url = "https://files.pythonhosted.org/packages/fb/31/c526a7bd2e5c710ae47717c7a5f53f616db6d9097caf48ad650581e81748/mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5", size = 11077900 }, + { url = "https://files.pythonhosted.org/packages/83/67/b7419c6b503679d10bd26fc67529bc6a1f7a5f220bbb9f292dc10d33352f/mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e", size = 10074818 }, + { url = "https://files.pythonhosted.org/packages/ba/07/37d67048786ae84e6612575e173d713c9a05d0ae495dde1e68d972207d98/mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2", size = 12589275 }, + { url = "https://files.pythonhosted.org/packages/1f/17/b1018c6bb3e9f1ce3956722b3bf91bff86c1cefccca71cec05eae49d6d41/mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0", size = 13037783 }, + { url = "https://files.pythonhosted.org/packages/cb/32/cd540755579e54a88099aee0287086d996f5a24281a673f78a0e14dba150/mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2", size = 9726197 }, + { url = "https://files.pythonhosted.org/packages/11/bb/ab4cfdc562cad80418f077d8be9b4491ee4fb257440da951b85cbb0a639e/mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7", size = 11069721 }, + { url = "https://files.pythonhosted.org/packages/59/3b/a393b1607cb749ea2c621def5ba8c58308ff05e30d9dbdc7c15028bca111/mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62", size = 10063996 }, + { url = "https://files.pythonhosted.org/packages/d1/1f/6b76be289a5a521bb1caedc1f08e76ff17ab59061007f201a8a18cc514d1/mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8", size = 12584043 }, + { url = "https://files.pythonhosted.org/packages/a6/83/5a85c9a5976c6f96e3a5a7591aa28b4a6ca3a07e9e5ba0cec090c8b596d6/mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7", size = 13036996 }, + { url = "https://files.pythonhosted.org/packages/b4/59/c39a6f752f1f893fccbcf1bdd2aca67c79c842402b5283563d006a67cf76/mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc", size = 9737709 }, + { url = "https://files.pythonhosted.org/packages/3b/86/72ce7f57431d87a7ff17d442f521146a6585019eb8f4f31b7c02801f78ad/mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a", size = 2647043 }, ] [[package]] @@ -1606,6 +2176,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, ] +[[package]] +name = "paramiko" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bcrypt" }, + { name = "cryptography" }, + { name = "pynacl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/0f/c00296e36ff7485935b83d466c4f2cf5934b84b0ad14e81796e1d9d3609b/paramiko-3.5.0.tar.gz", hash = "sha256:ad11e540da4f55cedda52931f1a3f812a8238a7af7f62a60de538cd80bb28124", size = 1704305 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/66/14b2c030fcce69cba482d205c2d1462ca5c77303a263260dcb1192801c85/paramiko-3.5.0-py3-none-any.whl", hash = "sha256:1fedf06b085359051cd7d0d270cebe19e755a8a921cc2ddbfa647fb0cd7d68f9", size = 227143 }, +] + [[package]] name = "parso" version = "0.8.4" @@ -1629,11 +2213,11 @@ wheels = [ [[package]] name = "phonenumbers" -version = "8.13.47" +version = "8.13.48" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/0c/8f315d5e6ddea2e45ae13ada6936df6240858929881daf20cb3133fdb729/phonenumbers-8.13.47.tar.gz", hash = "sha256:53c5e7c6d431cafe4efdd44956078404ae9bc8b0eacc47be3105d3ccc88aaffa", size = 2297081 } +sdist = { url = "https://files.pythonhosted.org/packages/61/59/d01506a791481d26a640acb0a1124e3f0a816b0711e563962d7d55184890/phonenumbers-8.13.48.tar.gz", hash = "sha256:62d8df9b0f3c3c41571c6b396f044ddd999d61631534001b8be7fdf7ba1b18f3", size = 2297098 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/0b/5cde445764ac72460748107e999b026b7245e3fcc5fd5551cc5aff45e469/phonenumbers-8.13.47-py2.py3-none-any.whl", hash = "sha256:5d3c0142ef7055ca5551884352e3b6b93bfe002a0bc95b8eaba39b0e2184541b", size = 2582530 }, + { url = "https://files.pythonhosted.org/packages/98/f4/a9340f98335ae6fab1ad4b56b6a04f390de65bea371c71b0cdf67e4c08d0/phonenumbers-8.13.48-py2.py3-none-any.whl", hash = "sha256:5c51939acefa390eb74119750afb10a85d3c628dc83fd62c52d6f532fcf5d205", size = 2582542 }, ] [[package]] @@ -1881,16 +2465,41 @@ wheels = [ [[package]] name = "pydantic-pkgr" version = "0.5.4" -source = { registry = "https://pypi.org/simple" } +source = { editable = "packages/pydantic-pkgr" } dependencies = [ { name = "platformdirs" }, { name = "pydantic" }, { name = "pydantic-core" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d2/18/3bf29e213c4a19d5b08e0fa1048c72f76c54565a208cced1fd4a60f989fc/pydantic_pkgr-0.5.4.tar.gz", hash = "sha256:e3487b46357b1e1b729363385590355cfac261b18ed207f59e9b613c5a8d45b2", size = 42408 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/01/97/9ec8d45e4af1a3af7d0ca78e12bcb1d74a446399034cb1514aab2bac056e/pydantic_pkgr-0.5.4-py3-none-any.whl", hash = "sha256:46ad1ad5954ee9c55b2c2f2c2be749a39992a89edde624454e63d8a7b550be8b", size = 45061 }, + +[package.optional-dependencies] +all = [ + { name = "ansible" }, + { name = "ansible-core" }, + { name = "ansible-runner" }, + { name = "pyinfra" }, +] +ansible = [ + { name = "ansible" }, + { name = "ansible-core" }, + { name = "ansible-runner" }, +] +pyinfra = [ + { name = "pyinfra" }, +] + +[package.metadata] +requires-dist = [ + { name = "ansible", marker = "extra == 'ansible'", specifier = ">=10.5.0" }, + { name = "ansible-core", marker = "extra == 'ansible'", specifier = ">=2.17.5" }, + { name = "ansible-runner", marker = "extra == 'ansible'", specifier = ">=2.4.0" }, + { name = "platformdirs", specifier = ">=4.3.6" }, + { name = "pydantic", specifier = ">=2.7.1" }, + { name = "pydantic-core", specifier = ">=2.18.2" }, + { name = "pydantic-pkgr", extras = ["pyinfra", "ansible"], marker = "extra == 'all'", editable = "packages/pydantic-pkgr" }, + { name = "pyinfra", marker = "extra == 'pyinfra'", specifier = ">=2.6.1" }, + { name = "typing-extensions", specifier = ">=4.11.0" }, ] [[package]] @@ -1924,6 +2533,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, ] +[[package]] +name = "pyinfra" +version = "3.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "configparser" }, + { name = "distro" }, + { name = "gevent" }, + { name = "jinja2" }, + { name = "packaging" }, + { name = "paramiko" }, + { name = "python-dateutil" }, + { name = "pywinrm" }, + { name = "setuptools" }, + { name = "typeguard" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/12/1c/bb923dcd1ee29272e31986ef5f64e91b586a0c685efe82672f6cf468e96d/pyinfra-3.1.1.tar.gz", hash = "sha256:5209a05897597c8747511bb559809a64a84377ae77424d3869d46583f95f2f30", size = 198499 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/56/cf53e42877039d13c3e07d63a38ce28e2cc4dca167a2cdc5420f2766f95a/pyinfra-3.1.1-py2.py3-none-any.whl", hash = "sha256:c87c75fcc03197ce84cb078838e225669be5cc0c4d4e52e408a9e774a3d183f6", size = 255376 }, +] + +[[package]] +name = "pynacl" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, + { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, + { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, + { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, + { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, + { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, + { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, + { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, + { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, +] + [[package]] name = "pyopenssl" version = "24.2.1" @@ -1936,6 +2588,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/dd/e0aa7ebef5168c75b772eda64978c597a9129b46be17779054652a7999e4/pyOpenSSL-24.2.1-py3-none-any.whl", hash = "sha256:967d5719b12b243588573f39b0c677637145c7a1ffedcd495a487e58177fbb8d", size = 58390 }, ] +[[package]] +name = "pyspnego" +version = "0.11.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "sspilib", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/46/f5/1f938a781742d18475ac43a101ec8a9499e1655da0984e08b59e20012c04/pyspnego-0.11.1.tar.gz", hash = "sha256:e92ed8b0a62765b9d6abbb86a48cf871228ddb97678598dc01c9c39a626823f6", size = 225697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/c3/4dc3d1d029e14bf065f1df9e98e3e503e622de34706a06ab6c3731377e85/pyspnego-0.11.1-py3-none-any.whl", hash = "sha256:129a4294f2c4d681d5875240ef87accc6f1d921e8983737fb0b59642b397951e", size = 130456 }, +] + [[package]] name = "pytest" version = "8.3.3" @@ -1995,6 +2660,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/91/832fb3b3a1f62bd2ab4924f6be0c7736c9bc4f84d3b153b74efcf6d4e4a1/python_crontab-3.2.0-py3-none-any.whl", hash = "sha256:82cb9b6a312d41ff66fd3caf3eed7115c28c195bfb50711bc2b4b9592feb9fe5", size = 27351 }, ] +[[package]] +name = "python-daemon" +version = "3.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "lockfile" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/54/cd/d62884732e5d6ff6906234169d06338d53e37243c60cf73679c8942f9e42/python_daemon-3.1.0.tar.gz", hash = "sha256:fdb621d7e5f46e74b4de1ad6b0fff6e69cd91b4f219de1476190ebdd0f4781df", size = 61947 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/78/09ce91de8b31930c415d7439fa4f9d00d25af57135c16358c0b5b0ae0dea/python_daemon-3.1.0-py3-none-any.whl", hash = "sha256:a66b5896f0aed5807a25c6128268eb496488b1f9c6927c487710049ba16be32a", size = 30899 }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -2065,6 +2743,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, ] +[[package]] +name = "pywinrm" +version = "0.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, + { name = "requests-ntlm" }, + { name = "xmltodict" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5a/2f/d835c342c4b11e28beaccef74982e7669986c84bf19654c39f53c8b8243c/pywinrm-0.5.0.tar.gz", hash = "sha256:5428eb1e494af7954546cd4ff15c9ef1a30a75e05b25a39fd606cef22201e9f1", size = 40875 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/45/4340320145c225387f40ce412de1b209d991c322032e4922cc0a9935fd31/pywinrm-0.5.0-py3-none-any.whl", hash = "sha256:c267046d281de613fc7c8a528cdd261564d9b99bdb7c2926221eff3263b700c8", size = 48182 }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -2207,6 +2899,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] +[[package]] +name = "requests-ntlm" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyspnego" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/15/74/5d4e1815107e9d78c44c3ad04740b00efd1189e5a9ec11e5275b60864e54/requests_ntlm-1.3.0.tar.gz", hash = "sha256:b29cc2462623dffdf9b88c43e180ccb735b4007228a542220e882c58ae56c668", size = 16112 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/5d/836b97537a390cf811b0488490c389c5a614f0a93acb23f347bd37a2d914/requests_ntlm-1.3.0-py3-none-any.whl", hash = "sha256:4c7534a7d0e482bb0928531d621be4b2c74ace437e88c5a357ceb7452d25a510", size = 6577 }, +] + [[package]] name = "requests-tracker" version = "0.3.3" @@ -2220,18 +2926,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/f5/d2fd9443c1839edf0c17216e9ab03201c16468e82e2968504fc738cd6917/requests_tracker-0.3.3-py3-none-any.whl", hash = "sha256:31d8924470ceea34be51743142c5248f1bf625d2ff95d1f0dccc2cfe14ecda0b", size = 58078 }, ] +[[package]] +name = "resolvelib" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ce/10/f699366ce577423cbc3df3280063099054c23df70856465080798c6ebad6/resolvelib-1.0.1.tar.gz", hash = "sha256:04ce76cbd63fded2078ce224785da6ecd42b9564b1390793f64ddecbe997b309", size = 21065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/fc/e9ccf0521607bcd244aa0b3fbd574f71b65e9ce6a112c83af988bbbe2e23/resolvelib-1.0.1-py2.py3-none-any.whl", hash = "sha256:d2da45d1a8dfee81bdd591647783e340ef3bcb104b54c383f70d422ef5cc7dbf", size = 17194 }, +] + [[package]] name = "rich" -version = "13.9.2" +version = "13.9.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/aa/9e/1784d15b057b0075e5136445aaea92d23955aad2c93eaede673718a40d95/rich-13.9.2.tar.gz", hash = "sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c", size = 222843 } +sdist = { url = "https://files.pythonhosted.org/packages/d9/e9/cf9ef5245d835065e6673781dbd4b8911d352fb770d56cf0879cf11b7ee1/rich-13.9.3.tar.gz", hash = "sha256:bc1e01b899537598cf02579d2b9f4a415104d3fc439313a7a2c165d76557a08e", size = 222889 } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/91/5474b84e505a6ccc295b2d322d90ff6aa0746745717839ee0c5fb4fdcceb/rich-13.9.2-py3-none-any.whl", hash = "sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1", size = 242117 }, + { url = "https://files.pythonhosted.org/packages/9a/e2/10e9819cf4a20bd8ea2f5dabafc2e6bf4a78d6a0965daeb60a4b34d1c11f/rich-13.9.3-py3-none-any.whl", hash = "sha256:9836f5096eb2172c9e77df411c1b009bace4193d6a481d534fea75ebba758283", size = 242157 }, ] [[package]] @@ -2463,6 +3178,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/a5/b2860373aa8de1e626b2bdfdd6df4355f0565b47e51f7d0c54fe70faf8fe/sqlparse-0.5.1-py3-none-any.whl", hash = "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", size = 44156 }, ] +[[package]] +name = "sspilib" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/36/48/8d634ac9aa5404b77f2d66b5a354751b7bbbf2be2947328fe895034cb750/sspilib-0.2.0.tar.gz", hash = "sha256:4d6cd4290ca82f40705efeb5e9107f7abcd5e647cb201a3d04371305938615b8", size = 55815 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/ac/b59283a2a0c91ef136f4979d711cd8dcd005b9f18b4a50ffaaa50e00f200/sspilib-0.2.0-cp310-cp310-win32.whl", hash = "sha256:e436fa09bcf353a364a74b3ef6910d936fa8cd1493f136e517a9a7e11b319c57", size = 487673 }, + { url = "https://files.pythonhosted.org/packages/c5/bc/84cb16b512902b972cfd89130918f01aabb8016814442ff6bd2cf89d6530/sspilib-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:850a17c98d2b8579b183ce37a8df97d050bc5b31ab13f5a6d9e39c9692fe3754", size = 565326 }, + { url = "https://files.pythonhosted.org/packages/c5/0d/d15fe0e5c87a51b7d693e889656816fd8d67995fbd072ab9852934e9ecf4/sspilib-0.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:a4d788a53b8db6d1caafba36887d5ac2087e6b6be6f01eb48f8afea6b646dbb5", size = 473562 }, + { url = "https://files.pythonhosted.org/packages/70/16/c31487f432724813a27f30c1a63ec07217adf65572e33fe9c4dcfd47a1b3/sspilib-0.2.0-cp311-cp311-win32.whl", hash = "sha256:400d5922c2c2261009921157c4b43d868e84640ad86e4dc84c95b07e5cc38ac6", size = 485419 }, + { url = "https://files.pythonhosted.org/packages/15/e9/0cb63b7f1014eff9c1a5b83920a423080b10f29ddf0264fced6abbdbad28/sspilib-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3e7d19c16ba9189ef8687b591503db06cfb9c5eb32ab1ca3bb9ebc1a8a5f35c", size = 564816 }, + { url = "https://files.pythonhosted.org/packages/b9/d9/3b8295f652afe71c0cdfd731eb7d37cc13a8adbfeacd3d67606d486d79b2/sspilib-0.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:f65c52ead8ce95eb78a79306fe4269ee572ef3e4dcc108d250d5933da2455ecc", size = 472529 }, + { url = "https://files.pythonhosted.org/packages/a9/82/07a49f00c0e7feff26f288b5f0747add197fc0db1ddddfab5fd5bdd94bdf/sspilib-0.2.0-cp312-cp312-win32.whl", hash = "sha256:bdf9a4f424add02951e1f01f47441d2e69a9910471e99c2c88660bd8e184d7f8", size = 487318 }, + { url = "https://files.pythonhosted.org/packages/38/54/949a9e9c07cd6efead79a7f78cc951cb5fa4f9f1fcb25b8520fd2adcdbe0/sspilib-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:40a97ca83e503a175d1dc9461836994e47e8b9bcf56cab81a2c22e27f1993079", size = 569220 }, + { url = "https://files.pythonhosted.org/packages/8f/52/c7a16472e9582474626f48ec79a821f66e5698cf5552baf923dfc636989e/sspilib-0.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:8ffc09819a37005c66a580ff44f544775f9745d5ed1ceeb37df4e5ff128adf36", size = 471371 }, + { url = "https://files.pythonhosted.org/packages/bc/9c/8784d3afe27c2f68620ea60fa2b6347100694db35193ba42714bdf23f882/sspilib-0.2.0-cp313-cp313-win32.whl", hash = "sha256:b9044d6020aa88d512e7557694fe734a243801f9a6874e1c214451eebe493d92", size = 483600 }, + { url = "https://files.pythonhosted.org/packages/49/ad/40f898075c913c75060c17c9cc6d6b86e8f83b6f5e1e017627b07ff53fcd/sspilib-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:c39a698491f43618efca8776a40fb7201d08c415c507f899f0df5ada15abefaa", size = 563678 }, + { url = "https://files.pythonhosted.org/packages/dd/84/3232ee82e33e426cd9e2011111a3136e5715428f0331a6739930b530333a/sspilib-0.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:863b7b214517b09367511c0ef931370f0386ed2c7c5613092bf9b106114c4a0e", size = 469030 }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -2559,6 +3294,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/6c/a53cc9a97c2da76d9cd83c03f377468599a28f2d4ad9fc71c3b99640e71e/txaio-23.1.1-py2.py3-none-any.whl", hash = "sha256:aaea42f8aad50e0ecfb976130ada140797e9dcb85fad2cf72b0f37f8cefcb490", size = 30512 }, ] +[[package]] +name = "typeguard" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/e1/3178b3e5369a98239ed7301e3946747048c66f4023163d55918f11b82d4e/typeguard-4.3.0.tar.gz", hash = "sha256:92ee6a0aec9135181eae6067ebd617fd9de8d75d714fb548728a4933b1dea651", size = 73374 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/de/be0ba39ee73760bf33329b7c6f95bc67e96593c69c881671e312538e24bb/typeguard-4.3.0-py3-none-any.whl", hash = "sha256:4d24c5b39a117f8a895b9da7a9b3114f04eb63bade45a4492de49b175b6f7dfa", size = 35385 }, +] + [[package]] name = "typeid-python" version = "0.3.1" @@ -2639,27 +3386,27 @@ wheels = [ [[package]] name = "uv" -version = "0.4.25" +version = "0.4.26" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/bc/1a013408b7f9f437385705652f404b6b15127ecf108327d13be493bdfb81/uv-0.4.25.tar.gz", hash = "sha256:d39077cdfe3246885fcdf32e7066ae731a166101d063629f9cea08738f79e6a3", size = 2064863 } +sdist = { url = "https://files.pythonhosted.org/packages/cb/90/500da91a6d2fdad8060d27b0c2dd948bb807a7cfc5fe32abc90dfaeb363f/uv-0.4.26.tar.gz", hash = "sha256:e9f45d8765a037a13ddedebb9e36fdcf06b7957654cfa8055d84f19eba12957e", size = 2072287 } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/18/9c9056d373620b1cf5182ce9b2d258e86d117d667cf8883e12870f2a5edf/uv-0.4.25-py3-none-linux_armv6l.whl", hash = "sha256:94fb2b454afa6bdfeeea4b4581c878944ca9cf3a13712e6762f245f5fbaaf952", size = 13028246 }, - { url = "https://files.pythonhosted.org/packages/a1/19/8a3f09aba30ac5433dfecde55d5241a07c96bb12340c3b810bc58188a12e/uv-0.4.25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a7c3a18c20ddb527d296d1222bddf42b78031c50b5b4609d426569b5fb61f5b0", size = 13175265 }, - { url = "https://files.pythonhosted.org/packages/e8/c9/2f924bb29bd53c51b839c1c6126bd2cf4c451d4a7d8f34be078f9e31c57e/uv-0.4.25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:18100f0f36419a154306ed6211e3490bf18384cdf3f1a0950848bf64b62fa251", size = 12255610 }, - { url = "https://files.pythonhosted.org/packages/b2/5a/d8f8971aeb3389679505cf633a786cd72a96ce232f80f14cfe5a693b4c64/uv-0.4.25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:6e981b1465e30102e41946adede9cb08051a5d70c6daf09f91a7ea84f0b75c08", size = 12506511 }, - { url = "https://files.pythonhosted.org/packages/e3/96/8c73520daeba5022cec8749e44afd4ca9ef774bf728af9c258bddec3577f/uv-0.4.25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:578ae385fad6bd6f3868828e33d54994c716b315b1bc49106ec1f54c640837e4", size = 12836250 }, - { url = "https://files.pythonhosted.org/packages/67/3d/b0e810d365fb154fe1d380a0f43ee35a683cf9162f2501396d711bec2621/uv-0.4.25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d29a78f011ecc2f31c13605acb6574c2894c06d258b0f8d0dbb899986800450", size = 13521303 }, - { url = "https://files.pythonhosted.org/packages/2d/f4/dd3830ec7fc6e7e5237c184f30f2dbfed4f93605e472147eca1373bcc72b/uv-0.4.25-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ec181be2bda10651a3558156409ac481549983e0276d0e3645e3b1464e7f8715", size = 14105308 }, - { url = "https://files.pythonhosted.org/packages/f4/4e/0fca02f8681e4870beda172552e747e0424f6e9186546b00a5e92525fea9/uv-0.4.25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50c7d0d9e7f392f81b13bf3b7e37768d1486f2fc9d533a54982aa0ed11e4db23", size = 13859475 }, - { url = "https://files.pythonhosted.org/packages/33/07/1100e9bc652f2850930f466869515d16ffe9582aaaaa99bac332ebdfe3ea/uv-0.4.25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fc35b5273f1e018aecd66b70e0fd7d2eb6698853dde3e2fc644e7ebf9f825b1", size = 18100840 }, - { url = "https://files.pythonhosted.org/packages/fa/98/ba1cb7dd2aa639a064a9e49721e08f12a3424456d60dde1327e7c6437930/uv-0.4.25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7022a71ff63a3838796f40e954b76bf7820fc27e96fe002c537e75ff8e34f1d", size = 13645464 }, - { url = "https://files.pythonhosted.org/packages/0d/05/b97fb8c828a070e8291826922b2712d1146b11563b4860bc9ba80f5635d1/uv-0.4.25-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:e02afb0f6d4b58718347f7d7cfa5a801e985ce42181ba971ed85ef149f6658ca", size = 12694995 }, - { url = "https://files.pythonhosted.org/packages/b3/97/63df050811379130202898f60e735a1a331ba3a93b8aa1e9bb466f533913/uv-0.4.25-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:3d7680795ea78cdbabbcce73d039b2651cf1fa635ddc1aa3082660f6d6255c50", size = 12831737 }, - { url = "https://files.pythonhosted.org/packages/dc/e0/08352dcffa6e8435328861ea60b2c05e8bd030f1e93998443ba66209db7b/uv-0.4.25-py3-none-musllinux_1_1_i686.whl", hash = "sha256:aae9dcafd20d5ba978c8a4939ab942e8e2e155c109e9945207fbbd81d2892c9e", size = 13273529 }, - { url = "https://files.pythonhosted.org/packages/25/f4/eaf95e5eee4e2e69884df0953d094deae07216f72068ef1df08c0f49841d/uv-0.4.25-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:4c55040e67470f2b73e95e432aba06f103a0b348ea0b9c6689b1029c8d9e89fd", size = 15039860 }, - { url = "https://files.pythonhosted.org/packages/69/04/482b1cc9e8d599c7d766c4ba2d7a512ed3989921443792f92f26b8d44fe6/uv-0.4.25-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:bdbfd0c476b9e80a3f89af96aed6dd7d2782646311317a9c72614ccce99bb2ad", size = 13776302 }, - { url = "https://files.pythonhosted.org/packages/cd/7e/3d1cb735cc3df6341ac884b73eeec1f51a29192721be40be8e9b1d82666d/uv-0.4.25-py3-none-win32.whl", hash = "sha256:7d266e02fefef930609328c31c075084295c3cb472bab3f69549fad4fd9d82b3", size = 12970553 }, - { url = "https://files.pythonhosted.org/packages/04/e9/c00d2bb4a286b13fad0f06488ea9cbe9e76d0efcd81e7a907f72195d5b83/uv-0.4.25-py3-none-win_amd64.whl", hash = "sha256:be2a4fc4fcade9ea5e67e51738c95644360d6e59b6394b74fc579fb617f902f7", size = 14702875 }, + { url = "https://files.pythonhosted.org/packages/bf/1f/1e1af6656e83a9b0347c22328ad6d899760819e5f19fa80aee88b56d1e02/uv-0.4.26-py3-none-linux_armv6l.whl", hash = "sha256:d1ca5183afab454f28573a286811019b3552625af2cd1cd3996049d3bbfdb1ca", size = 13055731 }, + { url = "https://files.pythonhosted.org/packages/92/27/2235628adcf468bc6be98b84e509afa54240d359b4705454e7e957a9650d/uv-0.4.26-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:391a6f5e31b212cb72a8f460493bbdf4088e66049666ad064ac8530230031289", size = 13230933 }, + { url = "https://files.pythonhosted.org/packages/36/ce/dd9b312c2230705119d3de910a32bbd32dc500bf147c7a0076a31bdfd153/uv-0.4.26-py3-none-macosx_11_0_arm64.whl", hash = "sha256:acaa25b304db6f1e8064d3280532ecb80a58346e37f4199659269847848c4da0", size = 12266060 }, + { url = "https://files.pythonhosted.org/packages/4d/64/ef6532d84841f5e77e240df9a7dbdc3ca5bf45fae323f247b7bd57bea037/uv-0.4.26-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:2ddb60d508b668b8da055651b30ff56c1efb79d57b064c218a7622b5c74b2af8", size = 12539139 }, + { url = "https://files.pythonhosted.org/packages/1b/30/b4f98f5e28a8c41e370be1a6ef9d48a619e20d3caeb2bf437f1560fab2df/uv-0.4.26-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6f66f11e088d231b7e305f089dc949b0e6b1d65e0a877b50ba5c3ae26e151144", size = 12867987 }, + { url = "https://files.pythonhosted.org/packages/7f/5f/605fe50a0710a78013ad5b2b1034d8f056b5971fc023b6510a24e9350637/uv-0.4.26-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e086ebe200e9718e9622af405d45caad9d84b60824306fcb220335fe6fc90966", size = 13594669 }, + { url = "https://files.pythonhosted.org/packages/ae/4b/e3d02b963f9f83f76d1b0757204a210aceebe8ae16f69fcb431b09bc3926/uv-0.4.26-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:41f9876c22ad5b4518bffe9e50ec7169e242b64f139cdcaf42a76f70a9bd5c78", size = 14156314 }, + { url = "https://files.pythonhosted.org/packages/40/8e/7803d3b76d8694ba939509e49d0c37e70a6d580ef5b7f0242701533920e5/uv-0.4.26-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6091075420eda571b0377d351c393b096514cb036a3199e033e003edaa0ff880", size = 13897243 }, + { url = "https://files.pythonhosted.org/packages/97/ee/8d5b63b590d3cb9dae5ac396cc099dcad2e368794d77e34a52dd896e5d8e/uv-0.4.26-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1214caacc6b9f9c72749634c7a82a5d93123a44b70a1fa6a9d13993c126ca33e", size = 17961411 }, + { url = "https://files.pythonhosted.org/packages/da/9a/5a6a3ea6c2bc42904343897b666cb8c9ac921bf9551b463aeb592cd49d45/uv-0.4.26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a63a6fe6f249a9fff72328204c3e6b457aae5914590e6881b9b39dcc72d24df", size = 13700388 }, + { url = "https://files.pythonhosted.org/packages/33/52/009ea704318c5d0f290fb2ea4e1874d5625a60b290c6e5e49aae4d140091/uv-0.4.26-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c4c69532cb4d0c1e160883142b8bf0133a5a67e9aed5148e13743ae55c2dfc03", size = 12702036 }, + { url = "https://files.pythonhosted.org/packages/72/38/4dc590872e5c1810c6ec203d9b070278ed396a1ebf3396e556079946c894/uv-0.4.26-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:9560c2eb234ea92276bbc647854d4a9e75556981c1193c3cc59f6613f7d177f2", size = 12854127 }, + { url = "https://files.pythonhosted.org/packages/76/73/124820b37d1c8784fbebfc4b5b7812b4fa8e4e680c35b77a38be444dac9f/uv-0.4.26-py3-none-musllinux_1_1_i686.whl", hash = "sha256:a41bdd09b9a3ddc8f459c73e924485e1caae43e43305cedb65f5feac05cf184a", size = 13309009 }, + { url = "https://files.pythonhosted.org/packages/f4/e7/37cf24861c6f76ba85ac80c15c391848524668be8dcd218ed04da80a96b6/uv-0.4.26-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:23cee82020b9e973a5feba81c2cf359a5a09020216d98534926f45ee7b74521d", size = 15079442 }, + { url = "https://files.pythonhosted.org/packages/ca/ac/fa29079ee0c26c65efca5c447ef6ce66f0afca1f73c09d599229d2d9dfd4/uv-0.4.26-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:468f806e841229c0bd6e1cffaaffc064720704623890cee15b42b877cef748c5", size = 13827888 }, + { url = "https://files.pythonhosted.org/packages/40/e8/f9824ecb8b13da5e8b0e9b8fbc81edb9e0d41923ebc6e287ae2e5a04bc62/uv-0.4.26-py3-none-win32.whl", hash = "sha256:70a108399d6c9e3d1f4a0f105d6d016f97f292dbb6c724e1ed2e6dc9f6872c79", size = 13092190 }, + { url = "https://files.pythonhosted.org/packages/46/91/c76682177dbe46dc0cc9221f9483b186ad3d8e0b59056c2cdae5c011609c/uv-0.4.26-py3-none-win_amd64.whl", hash = "sha256:e826b544020ef407387ed734a89850cac011ee4b5daf94b4f616b71eff2c8a94", size = 14757412 }, ] [[package]] @@ -2862,7 +3609,7 @@ wheels = [ [[package]] name = "yt-dlp" -version = "2024.10.7" +version = "2024.10.22" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "brotli", marker = "implementation_name == 'cpython'" }, @@ -2874,9 +3621,9 @@ dependencies = [ { name = "urllib3" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2e/b1/08679efb4c1932dc6420deda8a89f03d7440d6462b7f61d339db2732a497/yt_dlp-2024.10.7.tar.gz", hash = "sha256:0baf1ab517c9748d7e337ced91c5543c36fc16246a9ebedac32ebf20c1998ceb", size = 2877443 } +sdist = { url = "https://files.pythonhosted.org/packages/2f/79/acfe1c2bf64ed83e1b465e6550c0f5bc2214ea447a900b102f5ca6e4186e/yt_dlp-2024.10.22.tar.gz", hash = "sha256:47b82a1fd22411b5c95ef2f0a1ae1af4e6dfd736ea99fdb2a0ea41445abc62ba", size = 2885622 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/91/ecb07d66110334cdb01e94b187577af3b041897090203c9957728825d46f/yt_dlp-2024.10.7-py3-none-any.whl", hash = "sha256:9e336ae663bfd7ad3ea1c02e722747388172719efc0fc39a807dace3073aa704", size = 3149082 }, + { url = "https://files.pythonhosted.org/packages/bb/68/548f9819b41d53561d4f3d39588111cf39993c066b6e5300b4ae118eb2e6/yt_dlp-2024.10.22-py3-none-any.whl", hash = "sha256:ba166602ebe22a220e4dc1ead45bf00eb469ed812b22f4fb8bb54734f9b02084", size = 3155189 }, ] [[package]] @@ -2889,36 +3636,48 @@ wheels = [ ] [[package]] -name = "zope-interface" -version = "7.1.0" +name = "zope-event" +version = "5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "setuptools" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e4/1f/8bb0739aba9a8909bcfa2e12dc20443ebd5bd773b6796603f1a126211e18/zope_interface-7.1.0.tar.gz", hash = "sha256:3f005869a1a05e368965adb2075f97f8ee9a26c61898a9e52a9764d93774f237", size = 300239 } +sdist = { url = "https://files.pythonhosted.org/packages/46/c2/427f1867bb96555d1d34342f1dd97f8c420966ab564d58d18469a1db8736/zope.event-5.0.tar.gz", hash = "sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd", size = 17350 } wheels = [ - { url = "https://files.pythonhosted.org/packages/52/cf/6fe78d1748ade8bde9e0afa0b7a6dc53427fa817c44c0c67937f4a3890ca/zope.interface-7.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2bd9e9f366a5df08ebbdc159f8224904c1c5ce63893984abb76954e6fbe4381a", size = 207992 }, - { url = "https://files.pythonhosted.org/packages/98/6a/7583a3bf0ba508d7454b69928ced99f516af674be7a2781d681bbdf3e439/zope.interface-7.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661d5df403cd3c5b8699ac480fa7f58047a3253b029db690efa0c3cf209993ef", size = 208498 }, - { url = "https://files.pythonhosted.org/packages/f2/d7/acae0a46ff4494ade2478335aeb2dec2ec024b7761915b82887cb04f207d/zope.interface-7.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91b6c30689cfd87c8f264acb2fc16ad6b3c72caba2aec1bf189314cf1a84ca33", size = 254730 }, - { url = "https://files.pythonhosted.org/packages/76/78/42201e0e6150a14d6aaf138f969186a89ec31d25a5860b7c054191cfefa6/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b6a4924f5bad9fe21d99f66a07da60d75696a136162427951ec3cb223a5570d", size = 249135 }, - { url = "https://files.pythonhosted.org/packages/3f/1e/a2bb69085db973bc936493e1a870c708b4e61496c4c1f04033a9aeb2dcce/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a3c00b35f6170be5454b45abe2719ea65919a2f09e8a6e7b1362312a872cd3", size = 254254 }, - { url = "https://files.pythonhosted.org/packages/4f/cf/a5cb40b19f52c100d0ce22797f63ac865ced81fbf3a75a7ae0ecf2c45810/zope.interface-7.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b936d61dbe29572fd2cfe13e30b925e5383bed1aba867692670f5a2a2eb7b4e9", size = 211705 }, - { url = "https://files.pythonhosted.org/packages/9a/0b/c9dd45c073109fcaa63d5e167cae9e364fcb25f3626350127258a678ff0f/zope.interface-7.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ac20581fc6cd7c754f6dff0ae06fedb060fa0e9ea6309d8be8b2701d9ea51c4", size = 208524 }, - { url = "https://files.pythonhosted.org/packages/e0/34/57afb328bcced4d0472c11cfab5581cc1e6bb91adf1bb87509a4f5690755/zope.interface-7.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:848b6fa92d7c8143646e64124ed46818a0049a24ecc517958c520081fd147685", size = 209032 }, - { url = "https://files.pythonhosted.org/packages/e9/a4/b2e4900f6d4a572979b5e8aa95f1ff9296b458978537f51ba546da51c108/zope.interface-7.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec1ef1fdb6f014d5886b97e52b16d0f852364f447d2ab0f0c6027765777b6667", size = 261251 }, - { url = "https://files.pythonhosted.org/packages/c3/89/2cd0a6b24819c024b340fa67f0dda65d0ac8bbd81f35a1fa7c468b681d55/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bcff5c09d0215f42ba64b49205a278e44413d9bf9fa688fd9e42bfe472b5f4f", size = 255366 }, - { url = "https://files.pythonhosted.org/packages/9e/00/e58be3067025ffbeed48094a07c1972d8150f6d628151fde66f16fa0d4ae/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07add15de0cc7e69917f7d286b64d54125c950aeb43efed7a5ea7172f000fbc1", size = 260078 }, - { url = "https://files.pythonhosted.org/packages/d1/b6/56436f9f6b74c13c9cd3dbd8345f47823d72b7c9ba2b39872cb7bee4cf42/zope.interface-7.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:9940d5bc441f887c5f375ec62bcf7e7e495a2d5b1da97de1184a88fb567f06af", size = 212092 }, - { url = "https://files.pythonhosted.org/packages/ee/d7/0ab8291230cf4fa05fa6f7bb26e0206d799a922070bc3a102f88133edc1e/zope.interface-7.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f245d039f72e6f802902375755846f5de1ee1e14c3e8736c078565599bcab621", size = 208649 }, - { url = "https://files.pythonhosted.org/packages/4e/ce/598d623faeca8a7ccb120a7d94f707efb61d21a57324a905c9a2bdb7b4b9/zope.interface-7.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6159e767d224d8f18deff634a1d3722e68d27488c357f62ebeb5f3e2f5288b1f", size = 209053 }, - { url = "https://files.pythonhosted.org/packages/ea/d0/c88caffdf6cf99e9b5d1fad9bdfa94d9eee21f72c2f9f4768bced100aab7/zope.interface-7.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e956b1fd7f3448dd5e00f273072e73e50dfafcb35e4227e6d5af208075593c9", size = 266506 }, - { url = "https://files.pythonhosted.org/packages/1d/bd/2b665bb66b18169828f0e3d0865eabdb3c8f59556db90367950edccfc072/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff115ef91c0eeac69cd92daeba36a9d8e14daee445b504eeea2b1c0b55821984", size = 261229 }, - { url = "https://files.pythonhosted.org/packages/04/a0/9a0595057002784395990b5e5a5e84e71905f5c110ea5ecae469dc831468/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec001798ab62c3fc5447162bf48496ae9fba02edc295a9e10a0b0c639a6452e", size = 267167 }, - { url = "https://files.pythonhosted.org/packages/fb/64/cf1a22aad65dc9746fdc6705042c066011e3fe80f9c73aea9a53b0b3642d/zope.interface-7.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:124149e2d42067b9c6597f4dafdc7a0983d0163868f897b7bb5dc850b14f9a87", size = 212207 }, - { url = "https://files.pythonhosted.org/packages/43/39/75d4e59474ec7aeb8eebb01fae88e97ee8b0b3144d7a445679f000001977/zope.interface-7.1.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9733a9a0f94ef53d7aa64661811b20875b5bc6039034c6e42fb9732170130573", size = 208650 }, - { url = "https://files.pythonhosted.org/packages/c9/24/929b5530508a39a842fe50e159681b3dd36800604252940662268c3a8551/zope.interface-7.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5fcf379b875c610b5a41bc8a891841533f98de0520287d7f85e25386cd10d3e9", size = 209057 }, - { url = "https://files.pythonhosted.org/packages/fa/a3/07c120b40d47a3b28faadbacea579db8d7dc9214c909da13d72fd55395f7/zope.interface-7.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0a45b5af9f72c805ee668d1479480ca85169312211bed6ed18c343e39307d5f", size = 266466 }, - { url = "https://files.pythonhosted.org/packages/4f/fa/e1925c8737787887a2801a45aadbc1ca8367fd9f135e721a2ce5a020e14d/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4af4a12b459a273b0b34679a5c3dc5e34c1847c3dd14a628aa0668e19e638ea2", size = 261220 }, - { url = "https://files.pythonhosted.org/packages/d5/79/d7828b915edf77f8f7849e0ab4380084d07c3d09ef86f9763f1490661d66/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a735f82d2e3ed47ca01a20dfc4c779b966b16352650a8036ab3955aad151ed8a", size = 267157 }, - { url = "https://files.pythonhosted.org/packages/98/ac/012f18dc9b35e8547975f6e0512bcb6a1e97901d7a5e4e4cb5899dee6304/zope.interface-7.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:5501e772aff595e3c54266bc1bfc5858e8f38974ce413a8f1044aae0f32a83a3", size = 212213 }, + { url = "https://files.pythonhosted.org/packages/fe/42/f8dbc2b9ad59e927940325a22d6d3931d630c3644dae7e2369ef5d9ba230/zope.event-5.0-py3-none-any.whl", hash = "sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26", size = 6824 }, +] + +[[package]] +name = "zope-interface" +version = "7.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/3c/f5/1079cab32302359cc09bd1dca9656e680601e0e8af9397322ab0fe85f368/zope.interface-7.1.1.tar.gz", hash = "sha256:4284d664ef0ff7b709836d4de7b13d80873dc5faeffc073abdb280058bfac5e3", size = 253129 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/41/328372febe88b50cb1c77d99fd3ee8e628fb125bd26b38b5351f8b9bdcbb/zope.interface-7.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6650bd56ef350d37c8baccfd3ee8a0483ed6f8666e641e4b9ae1a1827b79f9e5", size = 208001 }, + { url = "https://files.pythonhosted.org/packages/22/06/ced7336eeabba528a39803ccdf52200daa4e7b73d74feac52677f7c83a72/zope.interface-7.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84e87eba6b77a3af187bae82d8de1a7c208c2a04ec9f6bd444fd091b811ad92e", size = 208518 }, + { url = "https://files.pythonhosted.org/packages/9a/c9/3a63c758a68739080d8c343dda2fca4d214096ed97ce56b875086b309dd2/zope.interface-7.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c4e1b4c06d9abd1037c088dae1566c85f344a3e6ae4350744c3f7f7259d9c67", size = 254689 }, + { url = "https://files.pythonhosted.org/packages/9a/59/d8c59cfb16b3f086c868d0c531892c3914acbbb324005f0e5c640855a596/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cd5e3d910ac87652a09f6e5db8e41bc3b49cf08ddd2d73d30afc644801492cd", size = 249133 }, + { url = "https://files.pythonhosted.org/packages/9a/6e/449acdd6530cbb9c224be3e59b032d8fc6db35ea8b398aaabcaee50f3881/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca95594d936ee349620900be5b46c0122a1ff6ce42d7d5cb2cf09dc84071ef16", size = 254250 }, + { url = "https://files.pythonhosted.org/packages/76/cb/8a13047ae686ca0a478cbf9043132acdcc8ccf71cfa0af287de235fd54f4/zope.interface-7.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:ad339509dcfbbc99bf8e147db6686249c4032f26586699ec4c82f6e5909c9fe2", size = 211708 }, + { url = "https://files.pythonhosted.org/packages/cc/9e/a53e0b252dca6f4858765efd4287239542e3018efe403ccf4f4947b1f6a8/zope.interface-7.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e59f175e868f856a77c0a77ba001385c377df2104fdbda6b9f99456a01e102a", size = 208535 }, + { url = "https://files.pythonhosted.org/packages/4a/2c/19bb3ead6133fe457e833af67cc8ce497f54bfd90f5ac532af6e4892acb2/zope.interface-7.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0de23bcb93401994ea00bc5c677ef06d420340ac0a4e9c10d80e047b9ce5af3f", size = 209053 }, + { url = "https://files.pythonhosted.org/packages/18/3f/3b341ed342f594f3b9e3fc48acecd929d118ee1ea6e415cedfebc2b78214/zope.interface-7.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdb7e7e5524b76d3ec037c1d81a9e2c7457b240fd4cb0a2476b65c3a5a6c81f", size = 260764 }, + { url = "https://files.pythonhosted.org/packages/65/2a/bb8f72d938cf4edf7e40cbdf14477242a3753205c4f537dafdfbb33249e5/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3603ef82a9920bd0bfb505423cb7e937498ad971ad5a6141841e8f76d2fd5446", size = 254805 }, + { url = "https://files.pythonhosted.org/packages/b1/60/abc01b59a41762cf785be8e997a7301e3cb93d19e066a35f10fb31ac0277/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d52d052355e0c5c89e0630dd2ff7c0b823fd5f56286a663e92444761b35e25", size = 259573 }, + { url = "https://files.pythonhosted.org/packages/19/50/52a20a6a9e7c605eabb87dcdd5823369d3096854c41b968f2d1e18a8ae8f/zope.interface-7.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:179ad46ece518c9084cb272e4a69d266b659f7f8f48e51706746c2d8a426433e", size = 212067 }, + { url = "https://files.pythonhosted.org/packages/0f/fe/52bd130dd3f8b88868e741cf9bfeea4367e13d3f84933746f4ba01c85e6b/zope.interface-7.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e6503534b52bb1720ace9366ee30838a58a3413d3e197512f3338c8f34b5d89d", size = 208716 }, + { url = "https://files.pythonhosted.org/packages/8b/a9/51fe239b07f69384e77568ca3098c518926204eb1fdc7cdcc154c0c78521/zope.interface-7.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f85b290e5b8b11814efb0d004d8ce6c9a483c35c462e8d9bf84abb93e79fa770", size = 209115 }, + { url = "https://files.pythonhosted.org/packages/f0/fe/33f1f1e68d54c9563db436596a648e57c9dfc298dc0525d348cdb5e812d0/zope.interface-7.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d029fac6a80edae80f79c37e5e3abfa92968fe921886139b3ee470a1b177321a", size = 264001 }, + { url = "https://files.pythonhosted.org/packages/2e/7f/4d6dafc4debe955a72dd33f8cae1d2e522d43b42167ee8735fd0fe36961e/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5836b8fb044c6e75ba34dfaabc602493019eadfa0faf6ff25f4c4c356a71a853", size = 259018 }, + { url = "https://files.pythonhosted.org/packages/7d/3f/3180bbd9937a2889a67ad2515e56869e0cdb1f47a1f0da52dc1065c81ff8/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7395f13533318f150ee72adb55b29284b16e73b6d5f02ab21f173b3e83f242b8", size = 264470 }, + { url = "https://files.pythonhosted.org/packages/95/b8/46a52bfec80089d7e687c1e4471c5918e3a60c2dfff63d3e5588e4bd6656/zope.interface-7.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:1d0e23c6b746eb8ce04573cc47bcac60961ac138885d207bd6f57e27a1431ae8", size = 212226 }, + { url = "https://files.pythonhosted.org/packages/7e/78/60fb41f6fca56f90a107244e28768deac8697de8cc0f7c8469725c9949ad/zope.interface-7.1.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9fad9bd5502221ab179f13ea251cb30eef7cf65023156967f86673aff54b53a0", size = 208720 }, + { url = "https://files.pythonhosted.org/packages/a5/4b/9152d924be141a1b52700ec0bb5c9a28795f67f4253dadb7f4c0c6d63675/zope.interface-7.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:55c373becbd36a44d0c9be1d5271422fdaa8562d158fb44b4192297b3c67096c", size = 209114 }, + { url = "https://files.pythonhosted.org/packages/00/cc/23d6d94db158b31b82e92202d3e8938d5e5cb38e3141af823a34bd8ae511/zope.interface-7.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed1df8cc01dd1e3970666a7370b8bfc7457371c58ba88c57bd5bca17ab198053", size = 263960 }, + { url = "https://files.pythonhosted.org/packages/e7/d6/acd466c950688ed8964ade5f9c5f2c035a52b44f18f19a6d79d3de48a255/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c14f0727c978639139e6cad7a60e82b7720922678d75aacb90cf4ef74a068c", size = 259004 }, + { url = "https://files.pythonhosted.org/packages/71/31/44b746ed39134fa9c28262dc8ff9821c6b6f4df5a9edc1e599219d16cb79/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b1eed7670d564f1025d7cda89f99f216c30210e42e95de466135be0b4a499d9", size = 264463 }, + { url = "https://files.pythonhosted.org/packages/5a/e1/30fb5f7e587e14a57c8f41413cb76eecbcfd878ef105eb908d2d2e648b73/zope.interface-7.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:3defc925c4b22ac1272d544a49c6ba04c3eefcce3200319ee1be03d9270306dd", size = 212236 }, ] From b3c1cb716ef238d1e8e2132c2c816c9f0e30f381 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 04:07:35 -0700 Subject: [PATCH 05/25] move abx plugins inside vendor dir --- archivebox/__init__.py | 46 +- archivebox/config/__init__.py | 30 +- .../config/{configfile.py => collection.py} | 18 +- archivebox/config/common.py | 11 +- archivebox/config/django.py | 2 +- archivebox/config/views.py | 28 +- archivebox/core/__init__.py | 29 ++ archivebox/core/admin_site.py | 4 +- archivebox/core/apps.py | 13 +- archivebox/core/settings.py | 25 +- archivebox/core/settings_logging.py | 5 - archivebox/core/views.py | 38 +- archivebox/extractors/__init__.py | 46 +- archivebox/index/html.py | 11 +- archivebox/index/json.py | 4 +- archivebox/index/schema.py | 9 +- archivebox/machine/models.py | 29 +- archivebox/main.py | 60 ++- archivebox/misc/checks.py | 3 + archivebox/misc/shell_welcome_message.py | 3 +- archivebox/parsers/pocket_api.py | 11 +- archivebox/parsers/readwise_reader_api.py | 25 +- archivebox/search/__init__.py | 6 +- archivebox/vendor/__init__.py | 28 +- .../abx-plugin-archivedotorg}/README.md | 0 .../abx_plugin_archivedotorg/__init__.py | 21 + .../abx_plugin_archivedotorg}/archive_org.py | 0 .../abx_plugin_archivedotorg}/config.py | 5 +- .../abx-plugin-archivedotorg/pyproject.toml | 18 + .../vendor/abx-plugin-chrome}/README.md | 0 .../abx_plugin_chrome/__init__.py | 34 ++ .../abx_plugin_chrome}/binaries.py | 15 +- .../abx_plugin_chrome}/config.py | 6 +- .../abx_plugin_chrome}/dom.py | 0 .../abx_plugin_chrome}/pdf.py | 0 .../abx_plugin_chrome}/screenshot.py | 0 .../vendor/abx-plugin-chrome/pyproject.toml | 18 + .../vendor/abx-plugin-curl}/README.md | 0 .../abx_plugin_curl/__init__.py | 18 + .../abx_plugin_curl}/binaries.py | 8 +- .../abx_plugin_curl}/config.py | 4 +- .../abx_plugin_curl}/headers.py | 0 .../vendor/abx-plugin-curl/pyproject.toml | 18 + .../abx-plugin-default-binproviders/README.md | 0 .../abx_plugin_default_binproviders.py | 1 - .../pyproject.toml | 0 .../vendor/abx-plugin-favicon}/README.md | 0 .../abx_plugin_favicon/__init__.py | 29 ++ .../abx_plugin_favicon}/config.py | 5 +- .../abx_plugin_favicon}/favicon.py | 0 .../vendor/abx-plugin-favicon/pyproject.toml | 18 + .../vendor/abx-plugin-git}/README.md | 0 .../abx-plugin-git/abx_plugin_git/__init__.py | 29 ++ .../abx_plugin_git}/binaries.py | 8 +- .../abx-plugin-git/abx_plugin_git}/config.py | 4 +- .../abx_plugin_git/extractors.py | 15 + .../abx-plugin-git/abx_plugin_git}/git.py | 4 +- .../vendor/abx-plugin-git/pyproject.toml | 19 + .../vendor/abx-plugin-htmltotext}/README.md | 0 .../abx_plugin_htmltotext/__init__.py | 22 + .../abx_plugin_htmltotext}/config.py | 5 +- .../abx_plugin_htmltotext}/htmltotext.py | 0 .../abx-plugin-htmltotext/pyproject.toml | 17 + .../vendor}/abx-plugin-ldap-auth/README.md | 0 .../abx_plugin_ldap_auth}/__init__.py | 33 +- .../abx_plugin_ldap_auth}/binaries.py | 15 +- .../abx_plugin_ldap_auth}/config.py | 6 +- .../abx-plugin-ldap-auth/pyproject.toml | 20 + .../vendor/abx-plugin-mercury}/README.md | 0 .../abx_plugin_mercury/__init__.py | 29 ++ .../abx_plugin_mercury}/binaries.py | 10 +- .../abx_plugin_mercury}/config.py | 4 +- .../abx_plugin_mercury/extractors.py | 17 + .../abx_plugin_mercury}/mercury.py | 0 .../vendor/abx-plugin-mercury/pyproject.toml | 17 + .../vendor/abx-plugin-npm}/README.md | 0 .../abx_plugin_npm}/__init__.py | 5 +- .../abx_plugin_npm}/binaries.py | 0 .../abx_plugin_npm}/binproviders.py | 3 +- .../abx-plugin-npm/abx_plugin_npm}/config.py | 0 .../vendor/abx-plugin-npm}/pyproject.toml | 4 +- .../vendor/abx-plugin-pip}/README.md | 0 .../abx_plugin_pip}/.plugin_order | 0 .../abx_plugin_pip}/__init__.py | 5 +- .../abx_plugin_pip}/binaries.py | 2 +- .../abx_plugin_pip}/binproviders.py | 2 +- .../abx-plugin-pip/abx_plugin_pip}/config.py | 2 +- .../vendor/abx-plugin-pip}/pyproject.toml | 4 +- .../vendor/abx-plugin-playwright}/README.md | 0 .../abx_plugin_playwright}/__init__.py | 6 +- .../abx_plugin_playwright}/binaries.py | 4 +- .../abx_plugin_playwright}/binproviders.py | 4 +- .../abx_plugin_playwright}/config.py | 0 .../abx-plugin-playwright}/pyproject.toml | 6 +- .../vendor/abx-plugin-pocket}/README.md | 0 .../abx_plugin_pocket/__init__.py | 18 + .../abx_plugin_pocket}/config.py | 4 +- .../vendor/abx-plugin-pocket/pyproject.toml | 18 + .../vendor/abx-plugin-puppeteer}/README.md | 0 .../abx_plugin_puppeteer/__init__.py | 30 ++ .../abx_plugin_puppeteer}/binaries.py | 10 +- .../abx_plugin_puppeteer}/binproviders.py | 18 +- .../abx_plugin_puppeteer}/config.py | 4 +- .../abx-plugin-puppeteer/pyproject.toml | 19 + .../vendor/abx-plugin-readability}/README.md | 0 .../abx_plugin_readability/__init__.py | 30 ++ .../abx_plugin_readability}/binaries.py | 11 +- .../abx_plugin_readability}/config.py | 4 +- .../abx_plugin_readability/extractors.py | 19 + .../abx_plugin_readability}/readability.py | 0 .../abx-plugin-readability/pyproject.toml | 17 + .../vendor/abx-plugin-readwise}/README.md | 0 .../abx_plugin_readwise.py | 35 ++ .../vendor/abx-plugin-readwise/pyproject.toml | 18 + .../abx-plugin-ripgrep-search/README.md | 0 .../abx_plugin_ripgrep_search/__init__.py | 31 ++ .../abx_plugin_ripgrep_search}/binaries.py | 8 +- .../abx_plugin_ripgrep_search}/config.py | 4 +- .../searchbackend.py | 4 +- .../abx-plugin-ripgrep-search/pyproject.toml | 18 + .../vendor/abx-plugin-singlefile}/README.md | 0 .../abx_plugin_singlefile}/__init__.py | 22 +- .../abx_plugin_singlefile}/binaries.py | 11 +- .../abx_plugin_singlefile}/config.py | 4 +- .../abx_plugin_singlefile/extractors.py | 18 + .../abx_plugin_singlefile}/models.py | 0 .../abx_plugin_singlefile}/singlefile.py | 0 .../abx-plugin-singlefile/pyproject.toml | 19 + .../vendor}/abx-plugin-sonic-search/README.md | 0 .../abx_plugin_sonic_search}/__init__.py | 22 +- .../abx_plugin_sonic_search}/binaries.py | 8 +- .../abx_plugin_sonic_search}/config.py | 4 +- .../abx_plugin_sonic_search}/searchbackend.py | 2 +- .../abx-plugin-sonic-search/pyproject.toml | 20 + .../abx-plugin-sqlitefts-search/README.md | 0 .../abx_plugin_sqlitefts_search/__init__.py | 21 + .../abx_plugin_sqlitefts_search}/config.py | 4 +- .../searchbackend.py | 4 +- .../pyproject.toml | 18 + .../vendor/abx-plugin-title}/README.md | 0 .../abx_plugin_title/__init__.py | 9 + .../abx_plugin_title/extractor.py} | 4 +- .../vendor/abx-plugin-title/pyproject.toml | 18 + .../vendor/abx-plugin-wget}/README.md | 0 .../abx_plugin_wget/__init__.py | 35 ++ .../abx_plugin_wget}/binaries.py | 8 +- .../abx_plugin_wget}/config.py | 4 +- .../abx_plugin_wget/extractors.py | 35 ++ .../abx-plugin-wget/abx_plugin_wget}/wget.py | 11 +- .../abx_plugin_wget}/wget_util.py | 0 .../vendor/abx-plugin-wget/pyproject.toml | 18 + .../vendor/abx-plugin-ytdlp}/README.md | 0 .../abx_plugin_ytdlp}/__init__.py | 19 +- .../abx_plugin_ytdlp}/binaries.py | 13 +- .../abx_plugin_ytdlp}/config.py | 2 +- .../abx_plugin_ytdlp}/media.py | 0 .../vendor/abx-plugin-ytdlp/pyproject.toml | 19 + .../vendor/abx-spec-archivebox}/README.md | 0 .../abx_spec_archivebox/__init__.py | 28 + .../abx_spec_archivebox/effects.py | 0 .../abx_spec_archivebox/events.py | 0 .../abx_spec_archivebox/reads.py | 0 .../abx_spec_archivebox/states.py | 0 .../abx_spec_archivebox/writes.py | 1 - .../abx-spec-archivebox/pyproject.toml | 0 .../vendor/abx-spec-config}/README.md | 0 .../abx_spec_config/__init__.py | 66 +++ .../abx_spec_config/base_configset.py | 0 .../abx_spec_config/toml_util.py | 0 .../vendor}/abx-spec-config/pyproject.toml | 5 +- .../vendor/abx-spec-django}/README.md | 0 .../vendor/abx-spec-django/abx_spec_django.py | 118 +++++ .../vendor}/abx-spec-django/pyproject.toml | 0 .../vendor/abx-spec-extractor}/README.md | 0 .../abx-spec-extractor/abx_spec_extractor.py | 0 .../vendor}/abx-spec-extractor/pyproject.toml | 0 .../vendor/abx-spec-pydantic-pkgr}/README.md | 0 .../abx_spec_pydantic_pkgr.py | 114 +++++ .../abx-spec-pydantic-pkgr/pyproject.toml | 0 .../vendor/abx-spec-searchbackend/README.md | 0 .../abx_spec_searchbackend.py | 40 ++ .../abx-spec-searchbackend/pyproject.toml | 0 archivebox/vendor/abx/README.md | 0 archivebox/vendor/abx/abx.py | 483 ++++++++++++++++++ .../vendor}/abx/pyproject.toml | 0 archivebox/vendor/pocket | 1 + {packages => archivebox/vendor}/pydantic-pkgr | 0 .../__init__.py | 39 -- .../pyproject.toml | 7 - .../abx-plugin-chrome-extractor/__init__.py | 54 -- .../pyproject.toml | 7 - .../abx-plugin-curl-extractor/__init__.py | 38 -- .../abx-plugin-curl-extractor/pyproject.toml | 7 - .../abx-plugin-favicon-extractor/__init__.py | 39 -- .../pyproject.toml | 7 - packages/abx-plugin-git-extractor/__init__.py | 46 -- .../abx-plugin-git-extractor/extractors.py | 17 - .../abx-plugin-git-extractor/pyproject.toml | 7 - .../__init__.py | 41 -- .../pyproject.toml | 7 - packages/abx-plugin-ldap-auth/pyproject.toml | 22 - .../abx-plugin-mercury-extractor/__init__.py | 46 -- .../extractors.py | 19 - .../pyproject.toml | 7 - .../abx-plugin-pocket-extractor/__init__.py | 37 -- .../pyproject.toml | 7 - .../__init__.py | 46 -- .../pyproject.toml | 7 - .../__init__.py | 46 -- .../extractors.py | 20 - .../pyproject.toml | 7 - .../abx-plugin-readwise-extractor/__init__.py | 37 -- .../abx-plugin-readwise-extractor/config.py | 17 - .../pyproject.toml | 7 - .../abx-plugin-ripgrep-search/__init__.py | 48 -- .../abx-plugin-ripgrep-search/pyproject.toml | 7 - .../extractors.py | 19 - .../pyproject.toml | 7 - .../abx-plugin-sonic-search/pyproject.toml | 7 - .../abx-plugin-sqlitefts-search/__init__.py | 39 -- .../pyproject.toml | 7 - .../abx-plugin-wget-extractor/__init__.py | 54 -- .../abx-plugin-wget-extractor/extractors.py | 37 -- .../abx-plugin-wget-extractor/pyproject.toml | 7 - .../abx-plugin-ytdlp-extractor/pyproject.toml | 7 - .../abx_spec_archivebox/__init__.py | 7 - .../abx_spec_config/__init__.py | 50 -- .../abx_spec_django/__init__.py | 140 ----- .../abx-spec-django/abx_spec_django/apps.py | 14 - .../abx_spec_pydantic_pkgr.py | 72 --- .../abx_spec_searchbackend.py | 29 -- packages/abx/abx.py | 344 ------------- .../archivebox-pocket/.circleci/config.yml | 61 --- packages/archivebox-pocket/.gitignore | 43 -- packages/archivebox-pocket/LICENSE.md | 27 - packages/archivebox-pocket/MANIFEST.in | 2 - packages/archivebox-pocket/README.md | 66 --- packages/archivebox-pocket/pocket.py | 366 ------------- packages/archivebox-pocket/pyproject.toml | 19 - packages/archivebox-pocket/requirements.txt | 4 - packages/archivebox-pocket/setup.py | 41 -- packages/archivebox-pocket/test_pocket.py | 52 -- pyproject.toml | 83 ++- 243 files changed, 2162 insertions(+), 2709 deletions(-) rename archivebox/config/{configfile.py => collection.py} (94%) rename {packages/abx-plugin-archivedotorg-extractor => archivebox/vendor/abx-plugin-archivedotorg}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py rename archivebox/{extractors => vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg}/archive_org.py (100%) rename {packages/abx-plugin-archivedotorg-extractor => archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg}/config.py (54%) create mode 100644 archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml rename {packages/abx-plugin-chrome-extractor => archivebox/vendor/abx-plugin-chrome}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py rename {packages/abx-plugin-chrome-extractor => archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome}/binaries.py (92%) rename {packages/abx-plugin-chrome-extractor => archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome}/config.py (98%) rename archivebox/{extractors => vendor/abx-plugin-chrome/abx_plugin_chrome}/dom.py (100%) rename archivebox/{extractors => vendor/abx-plugin-chrome/abx_plugin_chrome}/pdf.py (100%) rename archivebox/{extractors => vendor/abx-plugin-chrome/abx_plugin_chrome}/screenshot.py (100%) create mode 100644 archivebox/vendor/abx-plugin-chrome/pyproject.toml rename {packages/abx-plugin-curl-extractor => archivebox/vendor/abx-plugin-curl}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py rename {packages/abx-plugin-curl-extractor => archivebox/vendor/abx-plugin-curl/abx_plugin_curl}/binaries.py (57%) rename {packages/abx-plugin-curl-extractor => archivebox/vendor/abx-plugin-curl/abx_plugin_curl}/config.py (90%) rename archivebox/{extractors => vendor/abx-plugin-curl/abx_plugin_curl}/headers.py (100%) create mode 100644 archivebox/vendor/abx-plugin-curl/pyproject.toml rename {packages => archivebox/vendor}/abx-plugin-default-binproviders/README.md (100%) rename {packages => archivebox/vendor}/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py (99%) rename {packages => archivebox/vendor}/abx-plugin-default-binproviders/pyproject.toml (100%) rename {packages/abx-plugin-favicon-extractor => archivebox/vendor/abx-plugin-favicon}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py rename {packages/abx-plugin-favicon-extractor => archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon}/config.py (64%) rename archivebox/{extractors => vendor/abx-plugin-favicon/abx_plugin_favicon}/favicon.py (100%) create mode 100644 archivebox/vendor/abx-plugin-favicon/pyproject.toml rename {packages/abx-plugin-git-extractor => archivebox/vendor/abx-plugin-git}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py rename {packages/abx-plugin-git-extractor => archivebox/vendor/abx-plugin-git/abx_plugin_git}/binaries.py (57%) rename {packages/abx-plugin-git-extractor => archivebox/vendor/abx-plugin-git/abx_plugin_git}/config.py (87%) create mode 100644 archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py rename archivebox/{extractors => vendor/abx-plugin-git/abx_plugin_git}/git.py (95%) create mode 100644 archivebox/vendor/abx-plugin-git/pyproject.toml rename {packages/abx-plugin-htmltotext-extractor => archivebox/vendor/abx-plugin-htmltotext}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py rename {packages/abx-plugin-htmltotext-extractor => archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext}/config.py (52%) rename archivebox/{extractors => vendor/abx-plugin-htmltotext/abx_plugin_htmltotext}/htmltotext.py (100%) create mode 100644 archivebox/vendor/abx-plugin-htmltotext/pyproject.toml rename {packages => archivebox/vendor}/abx-plugin-ldap-auth/README.md (100%) rename {packages/abx-plugin-ldap-auth => archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth}/__init__.py (68%) rename {packages/abx-plugin-ldap-auth => archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth}/binaries.py (78%) rename {packages/abx-plugin-ldap-auth => archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth}/config.py (96%) create mode 100644 archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml rename {packages/abx-plugin-mercury-extractor => archivebox/vendor/abx-plugin-mercury}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py rename {packages/abx-plugin-mercury-extractor => archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury}/binaries.py (78%) rename {packages/abx-plugin-mercury-extractor => archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury}/config.py (90%) create mode 100644 archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py rename archivebox/{extractors => vendor/abx-plugin-mercury/abx_plugin_mercury}/mercury.py (100%) create mode 100644 archivebox/vendor/abx-plugin-mercury/pyproject.toml rename {packages/abx-plugin-npm-binprovider => archivebox/vendor/abx-plugin-npm}/README.md (100%) rename {packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider => archivebox/vendor/abx-plugin-npm/abx_plugin_npm}/__init__.py (86%) rename {packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider => archivebox/vendor/abx-plugin-npm/abx_plugin_npm}/binaries.py (100%) rename {packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider => archivebox/vendor/abx-plugin-npm/abx_plugin_npm}/binproviders.py (94%) rename {packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider => archivebox/vendor/abx-plugin-npm/abx_plugin_npm}/config.py (100%) rename {packages/abx-plugin-npm-binprovider => archivebox/vendor/abx-plugin-npm}/pyproject.toml (81%) rename {packages/abx-plugin-pip-binprovider => archivebox/vendor/abx-plugin-pip}/README.md (100%) rename {packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider => archivebox/vendor/abx-plugin-pip/abx_plugin_pip}/.plugin_order (100%) rename {packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider => archivebox/vendor/abx-plugin-pip/abx_plugin_pip}/__init__.py (90%) rename {packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider => archivebox/vendor/abx-plugin-pip/abx_plugin_pip}/binaries.py (99%) rename {packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider => archivebox/vendor/abx-plugin-pip/abx_plugin_pip}/binproviders.py (98%) rename {packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider => archivebox/vendor/abx-plugin-pip/abx_plugin_pip}/config.py (86%) rename {packages/abx-plugin-pip-binprovider => archivebox/vendor/abx-plugin-pip}/pyproject.toml (82%) rename {packages/abx-plugin-playwright-binprovider => archivebox/vendor/abx-plugin-playwright}/README.md (100%) rename {packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider => archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright}/__init__.py (78%) rename {packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider => archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright}/binaries.py (73%) rename {packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider => archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright}/binproviders.py (98%) rename {packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider => archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright}/config.py (100%) rename {packages/abx-plugin-playwright-binprovider => archivebox/vendor/abx-plugin-playwright}/pyproject.toml (72%) rename {packages/abx-plugin-pocket-extractor => archivebox/vendor/abx-plugin-pocket}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py rename {packages/abx-plugin-pocket-extractor => archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket}/config.py (76%) create mode 100644 archivebox/vendor/abx-plugin-pocket/pyproject.toml rename {packages/abx-plugin-puppeteer-binprovider => archivebox/vendor/abx-plugin-puppeteer}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py rename {packages/abx-plugin-puppeteer-binprovider => archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer}/binaries.py (54%) rename {packages/abx-plugin-puppeteer-binprovider => archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer}/binproviders.py (93%) rename {packages/abx-plugin-puppeteer-binprovider => archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer}/config.py (79%) create mode 100644 archivebox/vendor/abx-plugin-puppeteer/pyproject.toml rename {packages/abx-plugin-readability-extractor => archivebox/vendor/abx-plugin-readability}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py rename {packages/abx-plugin-readability-extractor => archivebox/vendor/abx-plugin-readability/abx_plugin_readability}/binaries.py (69%) rename {packages/abx-plugin-readability-extractor => archivebox/vendor/abx-plugin-readability/abx_plugin_readability}/config.py (83%) create mode 100644 archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py rename archivebox/{extractors => vendor/abx-plugin-readability/abx_plugin_readability}/readability.py (100%) create mode 100644 archivebox/vendor/abx-plugin-readability/pyproject.toml rename {packages/abx-plugin-readwise-extractor => archivebox/vendor/abx-plugin-readwise}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py create mode 100644 archivebox/vendor/abx-plugin-readwise/pyproject.toml rename {packages => archivebox/vendor}/abx-plugin-ripgrep-search/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py rename {packages/abx-plugin-ripgrep-search => archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search}/binaries.py (65%) rename {packages/abx-plugin-ripgrep-search => archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search}/config.py (89%) rename {packages/abx-plugin-ripgrep-search => archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search}/searchbackend.py (93%) create mode 100644 archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml rename {packages/abx-plugin-singlefile-extractor => archivebox/vendor/abx-plugin-singlefile}/README.md (100%) rename {packages/abx-plugin-singlefile-extractor => archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile}/__init__.py (53%) rename {packages/abx-plugin-singlefile-extractor => archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile}/binaries.py (84%) rename {packages/abx-plugin-singlefile-extractor => archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile}/config.py (88%) create mode 100644 archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py rename {packages/abx-plugin-singlefile-extractor => archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile}/models.py (100%) rename archivebox/{extractors => vendor/abx-plugin-singlefile/abx_plugin_singlefile}/singlefile.py (100%) create mode 100644 archivebox/vendor/abx-plugin-singlefile/pyproject.toml rename {packages => archivebox/vendor}/abx-plugin-sonic-search/README.md (100%) rename {packages/abx-plugin-sonic-search => archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search}/__init__.py (53%) rename {packages/abx-plugin-sonic-search => archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search}/binaries.py (80%) rename {packages/abx-plugin-sonic-search => archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search}/config.py (93%) rename {packages/abx-plugin-sonic-search => archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search}/searchbackend.py (97%) create mode 100644 archivebox/vendor/abx-plugin-sonic-search/pyproject.toml rename {packages => archivebox/vendor}/abx-plugin-sqlitefts-search/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py rename {packages/abx-plugin-sqlitefts-search => archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search}/config.py (96%) rename {packages/abx-plugin-sqlitefts-search => archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search}/searchbackend.py (98%) create mode 100644 archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml rename {packages/abx-plugin-wget-extractor => archivebox/vendor/abx-plugin-title}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py rename archivebox/{extractors/title.py => vendor/abx-plugin-title/abx_plugin_title/extractor.py} (97%) create mode 100644 archivebox/vendor/abx-plugin-title/pyproject.toml rename {packages/abx-plugin-ytdlp-extractor => archivebox/vendor/abx-plugin-wget}/README.md (100%) create mode 100644 archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py rename {packages/abx-plugin-wget-extractor => archivebox/vendor/abx-plugin-wget/abx_plugin_wget}/binaries.py (57%) rename {packages/abx-plugin-wget-extractor => archivebox/vendor/abx-plugin-wget/abx_plugin_wget}/config.py (96%) create mode 100644 archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py rename archivebox/{extractors => vendor/abx-plugin-wget/abx_plugin_wget}/wget.py (97%) rename {packages/abx-plugin-wget-extractor => archivebox/vendor/abx-plugin-wget/abx_plugin_wget}/wget_util.py (100%) create mode 100644 archivebox/vendor/abx-plugin-wget/pyproject.toml rename {packages/abx-spec-archivebox => archivebox/vendor/abx-plugin-ytdlp}/README.md (100%) rename {packages/abx-plugin-ytdlp-extractor => archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp}/__init__.py (53%) rename {packages/abx-plugin-ytdlp-extractor => archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp}/binaries.py (77%) rename {packages/abx-plugin-ytdlp-extractor => archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp}/config.py (97%) rename archivebox/{extractors => vendor/abx-plugin-ytdlp/abx_plugin_ytdlp}/media.py (100%) create mode 100644 archivebox/vendor/abx-plugin-ytdlp/pyproject.toml rename {packages/abx-spec-django => archivebox/vendor/abx-spec-archivebox}/README.md (100%) create mode 100644 archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py rename {packages => archivebox/vendor}/abx-spec-archivebox/abx_spec_archivebox/effects.py (100%) rename {packages => archivebox/vendor}/abx-spec-archivebox/abx_spec_archivebox/events.py (100%) rename {packages => archivebox/vendor}/abx-spec-archivebox/abx_spec_archivebox/reads.py (100%) rename {packages => archivebox/vendor}/abx-spec-archivebox/abx_spec_archivebox/states.py (100%) rename {packages => archivebox/vendor}/abx-spec-archivebox/abx_spec_archivebox/writes.py (99%) rename {packages => archivebox/vendor}/abx-spec-archivebox/pyproject.toml (100%) rename {packages/abx-spec-extractor => archivebox/vendor/abx-spec-config}/README.md (100%) create mode 100644 archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py rename {packages => archivebox/vendor}/abx-spec-config/abx_spec_config/base_configset.py (100%) rename {packages => archivebox/vendor}/abx-spec-config/abx_spec_config/toml_util.py (100%) rename {packages => archivebox/vendor}/abx-spec-config/pyproject.toml (67%) rename {packages/abx-spec-pydantic-pkgr => archivebox/vendor/abx-spec-django}/README.md (100%) create mode 100644 archivebox/vendor/abx-spec-django/abx_spec_django.py rename {packages => archivebox/vendor}/abx-spec-django/pyproject.toml (100%) rename {packages/abx-spec-searchbackend => archivebox/vendor/abx-spec-extractor}/README.md (100%) rename {packages => archivebox/vendor}/abx-spec-extractor/abx_spec_extractor.py (100%) rename {packages => archivebox/vendor}/abx-spec-extractor/pyproject.toml (100%) rename {packages/abx => archivebox/vendor/abx-spec-pydantic-pkgr}/README.md (100%) create mode 100644 archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py rename {packages => archivebox/vendor}/abx-spec-pydantic-pkgr/pyproject.toml (100%) create mode 100644 archivebox/vendor/abx-spec-searchbackend/README.md create mode 100644 archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py rename {packages => archivebox/vendor}/abx-spec-searchbackend/pyproject.toml (100%) create mode 100644 archivebox/vendor/abx/README.md create mode 100644 archivebox/vendor/abx/abx.py rename {packages => archivebox/vendor}/abx/pyproject.toml (100%) create mode 160000 archivebox/vendor/pocket rename {packages => archivebox/vendor}/pydantic-pkgr (100%) delete mode 100644 packages/abx-plugin-archivedotorg-extractor/__init__.py delete mode 100644 packages/abx-plugin-archivedotorg-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-chrome-extractor/__init__.py delete mode 100644 packages/abx-plugin-chrome-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-curl-extractor/__init__.py delete mode 100644 packages/abx-plugin-curl-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-favicon-extractor/__init__.py delete mode 100644 packages/abx-plugin-favicon-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-git-extractor/__init__.py delete mode 100644 packages/abx-plugin-git-extractor/extractors.py delete mode 100644 packages/abx-plugin-git-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-htmltotext-extractor/__init__.py delete mode 100644 packages/abx-plugin-htmltotext-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-ldap-auth/pyproject.toml delete mode 100644 packages/abx-plugin-mercury-extractor/__init__.py delete mode 100644 packages/abx-plugin-mercury-extractor/extractors.py delete mode 100644 packages/abx-plugin-mercury-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-pocket-extractor/__init__.py delete mode 100644 packages/abx-plugin-pocket-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-puppeteer-binprovider/__init__.py delete mode 100644 packages/abx-plugin-puppeteer-binprovider/pyproject.toml delete mode 100644 packages/abx-plugin-readability-extractor/__init__.py delete mode 100644 packages/abx-plugin-readability-extractor/extractors.py delete mode 100644 packages/abx-plugin-readability-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-readwise-extractor/__init__.py delete mode 100644 packages/abx-plugin-readwise-extractor/config.py delete mode 100644 packages/abx-plugin-readwise-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-ripgrep-search/__init__.py delete mode 100644 packages/abx-plugin-ripgrep-search/pyproject.toml delete mode 100644 packages/abx-plugin-singlefile-extractor/extractors.py delete mode 100644 packages/abx-plugin-singlefile-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-sonic-search/pyproject.toml delete mode 100644 packages/abx-plugin-sqlitefts-search/__init__.py delete mode 100644 packages/abx-plugin-sqlitefts-search/pyproject.toml delete mode 100644 packages/abx-plugin-wget-extractor/__init__.py delete mode 100644 packages/abx-plugin-wget-extractor/extractors.py delete mode 100644 packages/abx-plugin-wget-extractor/pyproject.toml delete mode 100644 packages/abx-plugin-ytdlp-extractor/pyproject.toml delete mode 100644 packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py delete mode 100644 packages/abx-spec-config/abx_spec_config/__init__.py delete mode 100644 packages/abx-spec-django/abx_spec_django/__init__.py delete mode 100644 packages/abx-spec-django/abx_spec_django/apps.py delete mode 100644 packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py delete mode 100644 packages/abx-spec-searchbackend/abx_spec_searchbackend.py delete mode 100644 packages/abx/abx.py delete mode 100644 packages/archivebox-pocket/.circleci/config.yml delete mode 100644 packages/archivebox-pocket/.gitignore delete mode 100644 packages/archivebox-pocket/LICENSE.md delete mode 100644 packages/archivebox-pocket/MANIFEST.in delete mode 100644 packages/archivebox-pocket/README.md delete mode 100644 packages/archivebox-pocket/pocket.py delete mode 100644 packages/archivebox-pocket/pyproject.toml delete mode 100644 packages/archivebox-pocket/requirements.txt delete mode 100644 packages/archivebox-pocket/setup.py delete mode 100644 packages/archivebox-pocket/test_pocket.py diff --git a/archivebox/__init__.py b/archivebox/__init__.py index bb2a9806..69df1876 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -15,7 +15,7 @@ import os import sys from pathlib import Path - +from typing import cast ASCII_LOGO = """ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β•β•β• β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β–ˆβ–ˆβ•—β•šβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β• @@ -52,6 +52,50 @@ load_vendored_libs() # print('DONE LOADING VENDORED LIBRARIES') +import abx # noqa +import abx_spec_archivebox # noqa +import abx_spec_config # noqa +import abx_spec_pydantic_pkgr # noqa +import abx_spec_django # noqa +import abx_spec_searchbackend # noqa + + +abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC) +abx.pm.register(abx_spec_config.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_pydantic_pkgr.PLUGIN_SPEC) +abx.pm.register(abx_spec_pydantic_pkgr.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_django.PLUGIN_SPEC) +abx.pm.register(abx_spec_django.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC) +abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC()) + + +abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm) +pm = abx.pm + + +# Load all installed ABX-compatible plugins +ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx') +# Load all ArchiveBox-specific plugins +ARCHIVEBOX_BUILTIN_PLUGINS = { + 'config': PACKAGE_DIR / 'config', + 'core': PACKAGE_DIR / 'core', + # 'search': PACKAGE_DIR / 'search', + # 'core': PACKAGE_DIR / 'core', +} +# Load all user-defined ArchiveBox plugins +USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins') +# Merge all plugins together +ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS} + + +# Load ArchiveBox plugins +LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS) + + from .config.constants import CONSTANTS # noqa from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .config.version import VERSION # noqa diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index a3e96681..55a76384 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -1,4 +1,5 @@ -__package__ = 'archivebox.config' +__package__ = 'config' +__order__ = 200 from .paths import ( PACKAGE_DIR, # noqa @@ -9,30 +10,3 @@ from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHI from .version import VERSION # noqa -import abx - - -# @abx.hookimpl -# def get_INSTALLED_APPS(): -# return ['config'] - - -@abx.hookimpl -def get_CONFIG(): - from .common import ( - SHELL_CONFIG, - STORAGE_CONFIG, - GENERAL_CONFIG, - SERVER_CONFIG, - ARCHIVING_CONFIG, - SEARCH_BACKEND_CONFIG, - ) - return { - 'SHELL_CONFIG': SHELL_CONFIG, - 'STORAGE_CONFIG': STORAGE_CONFIG, - 'GENERAL_CONFIG': GENERAL_CONFIG, - 'SERVER_CONFIG': SERVER_CONFIG, - 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, - 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, - } - diff --git a/archivebox/config/configfile.py b/archivebox/config/collection.py similarity index 94% rename from archivebox/config/configfile.py rename to archivebox/config/collection.py index 911e1559..d0c5a273 100644 --- a/archivebox/config/configfile.py +++ b/archivebox/config/collection.py @@ -9,6 +9,8 @@ from configparser import ConfigParser from benedict import benedict +import archivebox + from archivebox.config.constants import CONSTANTS from archivebox.misc.logging import stderr @@ -16,9 +18,9 @@ from archivebox.misc.logging import stderr def get_real_name(key: str) -> str: """get the up-to-date canonical name for a given old alias or current key""" - from django.conf import settings + CONFIGS = archivebox.pm.hook.get_CONFIGS() - for section in settings.CONFIGS.values(): + for section in CONFIGS.values(): try: return section.aliases[key] except KeyError: @@ -115,17 +117,15 @@ def load_config_file() -> Optional[benedict]: def section_for_key(key: str) -> Any: - from django.conf import settings - for config_section in settings.CONFIGS.values(): + for config_section in archivebox.pm.hook.get_CONFIGS().values(): if hasattr(config_section, key): return config_section - return None + raise ValueError(f'No config section found for key: {key}') def write_config_file(config: Dict[str, str]) -> benedict: """load the ini-formatted config file from DATA_DIR/Archivebox.conf""" - import abx.archivebox.reads from archivebox.misc.system import atomic_write CONFIG_HEADER = ( @@ -175,7 +175,7 @@ def write_config_file(config: Dict[str, str]) -> benedict: updated_config = {} try: # validate the updated_config by attempting to re-parse it - updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()} + updated_config = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()} except BaseException: # lgtm [py/catch-base-exception] # something went horribly wrong, revert to the previous version with open(f'{config_path}.bak', 'r', encoding='utf-8') as old: @@ -233,11 +233,11 @@ def load_config(defaults: Dict[str, Any], return benedict(extended_config) def load_all_config(): - import abx.archivebox.reads + import abx flat_config = benedict() - for config_section in abx.archivebox.reads.get_CONFIGS().values(): + for config_section in abx.pm.hook.get_CONFIGS().values(): config_section.__init__() flat_config.update(config_section.model_dump()) diff --git a/archivebox/config/common.py b/archivebox/config/common.py index 2deccb0d..238fcfac 100644 --- a/archivebox/config/common.py +++ b/archivebox/config/common.py @@ -7,10 +7,10 @@ from typing import Dict, Optional, List from pathlib import Path from rich import print -from pydantic import Field, field_validator, computed_field +from pydantic import Field, field_validator from django.utils.crypto import get_random_string -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from .constants import CONSTANTS from .version import get_COMMIT_HASH, get_BUILD_TIME @@ -31,22 +31,19 @@ class ShellConfig(BaseConfigSet): ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS) - VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)}, - CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)}, + # VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)}, + # CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)}, - @computed_field @property def TERM_WIDTH(self) -> int: if not self.IS_TTY: return 200 return shutil.get_terminal_size((140, 10)).columns - @computed_field @property def COMMIT_HASH(self) -> Optional[str]: return get_COMMIT_HASH() - @computed_field @property def BUILD_TIME(self) -> str: return get_BUILD_TIME() diff --git a/archivebox/config/django.py b/archivebox/config/django.py index eb79ab43..ad3d17c1 100644 --- a/archivebox/config/django.py +++ b/archivebox/config/django.py @@ -97,7 +97,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None: except Exception as e: bump_startup_progress_bar(advance=1000) - is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init')) + is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version')) if not is_using_meta_cmd: # show error message to user only if they're not running a meta command / just trying to get help STDERR.print() diff --git a/archivebox/config/views.py b/archivebox/config/views.py index db2c7eaa..975ef7ff 100644 --- a/archivebox/config/views.py +++ b/archivebox/config/views.py @@ -14,8 +14,8 @@ from django.utils.html import format_html, mark_safe from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink -import abx.archivebox.reads - +import abx +import archivebox from archivebox.config import CONSTANTS from archivebox.misc.util import parse_date @@ -65,7 +65,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str: @render_with_table_view def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: - + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { @@ -81,12 +81,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: relevant_configs = { key: val - for key, val in settings.FLAT_CONFIG.items() + for key, val in FLAT_CONFIG.items() if '_BINARY' in key or '_VERSION' in key } - for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items(): - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): if not plugin.hooks.get('get_BINARIES'): continue @@ -131,17 +130,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: @render_with_item_view def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: - assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' + assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.' binary = None plugin = None - for plugin_id in abx.archivebox.reads.get_PLUGINS().keys(): - loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): try: - for loaded_binary in loaded_plugin.hooks.get_BINARIES().values(): + for loaded_binary in plugin['hooks'].get_BINARIES().values(): if loaded_binary.name == key: binary = loaded_binary - plugin = loaded_plugin + plugin = plugin # break # last write wins except Exception as e: print(e) @@ -161,7 +159,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: "name": binary.name, "description": binary.abspath, "fields": { - 'plugin': plugin.package, + 'plugin': plugin['package'], 'binprovider': binary.loaded_binprovider, 'abspath': binary.loaded_abspath, 'version': binary.loaded_version, @@ -215,9 +213,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext: return color return 'black' - for plugin_id in settings.PLUGINS.keys(): - - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {}) plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {}) plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {}) @@ -263,7 +259,7 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: assert plugin_id, f'Could not find a plugin matching the specified name: {key}' - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + plugin = abx.get_plugin(plugin_id) return ItemContext( slug=key, diff --git a/archivebox/core/__init__.py b/archivebox/core/__init__.py index ac3ec769..9a301977 100644 --- a/archivebox/core/__init__.py +++ b/archivebox/core/__init__.py @@ -1,2 +1,31 @@ __package__ = 'archivebox.core' +import abx + +@abx.hookimpl +def register_admin(admin_site): + """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site""" + from core.admin import register_admin + register_admin(admin_site) + + + +@abx.hookimpl +def get_CONFIG(): + from archivebox.config.common import ( + SHELL_CONFIG, + STORAGE_CONFIG, + GENERAL_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, + ) + return { + 'SHELL_CONFIG': SHELL_CONFIG, + 'STORAGE_CONFIG': STORAGE_CONFIG, + 'GENERAL_CONFIG': GENERAL_CONFIG, + 'SERVER_CONFIG': SERVER_CONFIG, + 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, + 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, + } + diff --git a/archivebox/core/admin_site.py b/archivebox/core/admin_site.py index de92db8c..7aea2cf5 100644 --- a/archivebox/core/admin_site.py +++ b/archivebox/core/admin_site.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.core' from django.contrib import admin -import abx.django.use +import archivebox class ArchiveBoxAdmin(admin.AdminSite): site_header = 'ArchiveBox' @@ -37,6 +37,6 @@ def register_admin_site(): sites.site = archivebox_admin # register all plugins admin classes - abx.django.use.register_admin(archivebox_admin) + archivebox.pm.hook.register_admin(admin_site=archivebox_admin) return archivebox_admin diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py index 870a77f8..b516678f 100644 --- a/archivebox/core/apps.py +++ b/archivebox/core/apps.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.core' from django.apps import AppConfig -import abx +import archivebox class CoreConfig(AppConfig): @@ -10,16 +10,11 @@ class CoreConfig(AppConfig): def ready(self): """Register the archivebox.core.admin_site as the main django admin site""" + from django.conf import settings + archivebox.pm.hook.ready(settings=settings) + from core.admin_site import register_admin_site register_admin_site() - abx.pm.hook.ready() - - -@abx.hookimpl -def register_admin(admin_site): - """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site""" - from core.admin import register_admin - register_admin(admin_site) diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 88858156..06cfa8b2 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -9,10 +9,12 @@ from pathlib import Path from django.utils.crypto import get_random_string import abx +import archivebox -from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS +from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa + IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] @@ -22,24 +24,8 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v ### ArchiveBox Plugin Settings ################################################################################ -PLUGIN_HOOKSPECS = [ - 'abx_spec_django', - 'abx_spec_pydantic_pkgr', - 'abx_spec_config', - 'abx_spec_archivebox', -] -abx.register_hookspecs(PLUGIN_HOOKSPECS) - -SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx') -USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins') - -ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS} - -# Load ArchiveBox plugins -abx.load_plugins(ALL_PLUGINS) - -# # Load ArchiveBox config from plugins - +ALL_PLUGINS = archivebox.ALL_PLUGINS +LOADED_PLUGINS = archivebox.LOADED_PLUGINS ################################################################################ ### Django Core Settings @@ -101,6 +87,7 @@ INSTALLED_APPS = [ + MIDDLEWARE = [ 'core.middleware.TimezoneMiddleware', 'django.middleware.security.SecurityMiddleware', diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py index d9fc28bd..d292e15a 100644 --- a/archivebox/core/settings_logging.py +++ b/archivebox/core/settings_logging.py @@ -163,11 +163,6 @@ SETTINGS_LOGGING = { "level": "DEBUG", "propagate": False, }, - "plugins_extractor": { - "handlers": ["default", "logfile"], - "level": "DEBUG", - "propagate": False, - }, "httpx": { "handlers": ["outbound_webhooks"], "level": "INFO", diff --git a/archivebox/core/views.py b/archivebox/core/views.py index d423c146..bb1c234f 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -21,6 +21,7 @@ from django.utils.decorators import method_decorator from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink +import archivebox from core.models import Snapshot from core.forms import AddLinkForm @@ -32,9 +33,8 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG from archivebox.misc.util import base_url, htmlencode, ts_to_date_str from archivebox.misc.serve_static import serve_static_with_byterange_support -from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG -from ..logging_util import printable_filesize -from ..search import query_search_index +from archivebox.logging_util import printable_filesize +from archivebox.search import query_search_index class HomepageView(View): @@ -154,7 +154,7 @@ class SnapshotView(View): 'status_color': 'success' if link.is_archived else 'danger', 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), 'warc_path': warc_path, - 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG, + 'SAVE_ARCHIVE_DOT_ORG': archivebox.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG, 'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS, 'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']), 'best_result': best_result, @@ -500,21 +500,25 @@ class HealthCheckView(View): def find_config_section(key: str) -> str: + CONFIGS = archivebox.pm.hook.get_CONFIGS() + if key in CONSTANTS_CONFIG: return 'CONSTANT' matching_sections = [ - section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields + section_id for section_id, section in CONFIGS.items() if key in section.model_fields ] section = matching_sections[0] if matching_sections else 'DYNAMIC' return section def find_config_default(key: str) -> str: + CONFIGS = archivebox.pm.hook.get_CONFIGS() + if key in CONSTANTS_CONFIG: return str(CONSTANTS_CONFIG[key]) default_val = None - for config in settings.CONFIGS.values(): + for config in CONFIGS.values(): if key in config.model_fields: default_val = config.model_fields[key].default break @@ -530,7 +534,9 @@ def find_config_default(key: str) -> str: return default_val def find_config_type(key: str) -> str: - for config in settings.CONFIGS.values(): + CONFIGS = archivebox.pm.hook.get_CONFIGS() + + for config in CONFIGS.values(): if hasattr(config, key): type_hints = get_type_hints(config) try: @@ -547,7 +553,8 @@ def key_is_safe(key: str) -> bool: @render_with_table_view def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: - + CONFIGS = archivebox.pm.hook.get_CONFIGS() + assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { @@ -560,7 +567,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: # "Aliases": [], } - for section_id, section in reversed(list(settings.CONFIGS.items())): + for section_id, section in reversed(list(CONFIGS.items())): for key, field in section.model_fields.items(): rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '') rows['Key'].append(ItemLink(key, key=key)) @@ -570,7 +577,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: # rows['Documentation'].append(mark_safe(f'Wiki: {key}')) # rows['Aliases'].append(', '.join(find_config_aliases(key))) - section = 'CONSTANT' for key in CONSTANTS_CONFIG.keys(): rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '') @@ -589,7 +595,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: @render_with_item_view def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: - + CONFIGS = archivebox.pm.hook.get_CONFIGS() + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() + assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' # aliases = USER_CONFIG.get(key, {}).get("aliases", []) @@ -597,7 +605,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont if key in CONSTANTS_CONFIG: section_header = mark_safe(f'[CONSTANTS]   {key}   (read-only, hardcoded by ArchiveBox)') - elif key in settings.FLAT_CONFIG: + elif key in FLAT_CONFIG: section_header = mark_safe(f'data / ArchiveBox.conf   [{find_config_section(key)}]   {key}') else: section_header = mark_safe(f'[DYNAMIC CONFIG]   {key}   (read-only, calculated at runtime)') @@ -613,7 +621,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont "fields": { 'Key': key, 'Type': find_config_type(key), - 'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********', + 'Value': FLAT_CONFIG.get(key, CONFIGS.get(key, None)) if key_is_safe(key) else '********', }, "help_texts": { 'Key': mark_safe(f''' @@ -635,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont {find_config_default(key) or '↗️ See in ArchiveBox source code...'}

-

+

To change this value, edit data/ArchiveBox.conf or run:

archivebox config --set {key}="{ val.strip("'") if (val := find_config_default(key)) else - (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'") + (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'") }"

'''), diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 07ebb415..42f9d6c7 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -27,43 +27,29 @@ from ..logging_util import ( log_archive_method_finished, ) -from .title import should_save_title, save_title -from .favicon import should_save_favicon, save_favicon -from .wget import should_save_wget, save_wget -from .singlefile import should_save_singlefile, save_singlefile -from .readability import should_save_readability, save_readability -from .mercury import should_save_mercury, save_mercury -from .htmltotext import should_save_htmltotext, save_htmltotext -from .pdf import should_save_pdf, save_pdf -from .screenshot import should_save_screenshot, save_screenshot -from .dom import should_save_dom, save_dom -from .git import should_save_git, save_git -from .media import should_save_media, save_media -from .archive_org import should_save_archive_dot_org, save_archive_dot_org -from .headers import should_save_headers, save_headers - ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool] SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult] ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction] def get_default_archive_methods() -> List[ArchiveMethodEntry]: + # TODO: move to abx.pm.hook.get_EXTRACTORS() return [ - ('favicon', should_save_favicon, save_favicon), - ('headers', should_save_headers, save_headers), - ('singlefile', should_save_singlefile, save_singlefile), - ('pdf', should_save_pdf, save_pdf), - ('screenshot', should_save_screenshot, save_screenshot), - ('dom', should_save_dom, save_dom), - ('wget', should_save_wget, save_wget), - # keep title, readability, and htmltotext below wget and singlefile, as they depend on them - ('title', should_save_title, save_title), - ('readability', should_save_readability, save_readability), - ('mercury', should_save_mercury, save_mercury), - ('htmltotext', should_save_htmltotext, save_htmltotext), - ('git', should_save_git, save_git), - ('media', should_save_media, save_media), - ('archive_org', should_save_archive_dot_org, save_archive_dot_org), + # ('favicon', should_save_favicon, save_favicon), + # ('headers', should_save_headers, save_headers), + # ('singlefile', should_save_singlefile, save_singlefile), + # ('pdf', should_save_pdf, save_pdf), + # ('screenshot', should_save_screenshot, save_screenshot), + # ('dom', should_save_dom, save_dom), + # ('wget', should_save_wget, save_wget), + # # keep title, readability, and htmltotext below wget and singlefile, as they depend on them + # ('title', should_save_title, save_title), + # ('readability', should_save_readability, save_readability), + # ('mercury', should_save_mercury, save_mercury), + # ('htmltotext', should_save_htmltotext, save_htmltotext), + # ('git', should_save_git, save_git), + # ('media', should_save_media, save_media), + # ('archive_org', should_save_archive_dot_org, save_archive_dot_org), ] ARCHIVE_METHODS_INDEXING_PRECEDENCE = [ diff --git a/archivebox/index/html.py b/archivebox/index/html.py index eae93e67..24cad5c0 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -8,6 +8,8 @@ from typing import List, Optional, Iterator, Mapping from django.utils.html import format_html, mark_safe # type: ignore from django.core.cache import cache +import abx + from archivebox.misc.system import atomic_write from archivebox.misc.util import ( enforce_types, @@ -19,7 +21,6 @@ from archivebox.misc.util import ( from archivebox.config import CONSTANTS, DATA_DIR, VERSION from archivebox.config.common import SERVER_CONFIG from archivebox.config.version import get_COMMIT_HASH -from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG from .schema import Link from ..logging_util import printable_filesize @@ -79,8 +80,10 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None: @enforce_types def link_details_template(link: Link) -> str: - - from ..extractors.wget import wget_output_path + + from abx_plugin_wget_extractor.wget import wget_output_path + + SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG link_info = link._asdict(extended=True) @@ -102,7 +105,7 @@ def link_details_template(link: Link) -> str: 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), - 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG, + 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, 'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS, }) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index eaa93c2e..0a484c75 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -8,7 +8,7 @@ from pathlib import Path from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union -import abx.archivebox.reads +import abx from archivebox.config import VERSION, DATA_DIR, CONSTANTS from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG @@ -33,7 +33,7 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): 'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki', 'source': 'https://github.com/ArchiveBox/ArchiveBox', 'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues', - 'dependencies': dict(abx.archivebox.reads.get_BINARIES()), + 'dependencies': dict(abx.pm.hook.get_BINARIES()), }, } diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index a3c0e967..78e80ef9 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -17,9 +17,9 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property -from archivebox.config import ARCHIVE_DIR, CONSTANTS +import abx -from plugins_extractor.favicon.config import FAVICON_CONFIG +from archivebox.config import ARCHIVE_DIR, CONSTANTS from archivebox.misc.system import get_dir_size from archivebox.misc.util import ts_to_date_str, parse_date @@ -426,7 +426,10 @@ class Link: def canonical_outputs(self) -> Dict[str, Optional[str]]: """predict the expected output paths that should be present after archiving""" - from ..extractors.wget import wget_output_path + from abx_plugin_wget.wget import wget_output_path + + FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon + # TODO: banish this awful duplication from the codebase and import these # from their respective extractor files canonical = { diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py index 229e1d83..7686b73e 100644 --- a/archivebox/machine/models.py +++ b/archivebox/machine/models.py @@ -8,9 +8,10 @@ from django.db import models from django.utils import timezone from django.utils.functional import cached_property -import abx.archivebox.reads +import abx +import archivebox -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider +from pydantic_pkgr import Binary, BinProvider from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats @@ -180,7 +181,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats): class InstalledBinaryManager(models.Manager): - def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary': + def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary': """Get or create an InstalledBinary record for a Binary on the local machine""" global _CURRENT_BINARIES @@ -216,7 +217,7 @@ class InstalledBinaryManager(models.Manager): # if binary was not yet loaded from filesystem, do it now # this is expensive, we have to find it's abspath, version, and sha256, but it's necessary # to make sure we have a good, up-to-date record of it in the DB & in-memroy cache - binary = binary.load(fresh=True) + binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True) assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256' @@ -291,8 +292,8 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): if not hasattr(self, 'machine'): self.machine = Machine.objects.current() if not self.binprovider: - all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values()) - binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True) + all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values()) + binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True) self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None if not self.abspath: self.abspath = self.BINPROVIDER.get_abspath(self.name) @@ -304,16 +305,16 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): super().clean(*args, **kwargs) @cached_property - def BINARY(self) -> BaseBinary: - for binary in abx.archivebox.reads.get_BINARIES().values(): + def BINARY(self) -> Binary: + for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values(): if binary.name == self.name: return binary raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it') # TODO: we could technically reconstruct it from scratch, but why would we ever want to do that? @cached_property - def BINPROVIDER(self) -> BaseBinProvider: - for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values(): + def BINPROVIDER(self) -> BinProvider: + for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values(): if binprovider.name == self.binprovider: return binprovider raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})') @@ -321,7 +322,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): # maybe not a good idea to provide this? Binary in DB is a record of the binary's config # whereas a loaded binary is a not-yet saved instance that may not have the same config # why would we want to load a binary record from the db when it could be freshly loaded? - def load_from_db(self) -> BaseBinary: + def load_from_db(self) -> Binary: # TODO: implement defaults arg in pydantic_pkgr # return self.BINARY.load(defaults={ # 'binprovider': self.BINPROVIDER, @@ -330,7 +331,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): # 'sha256': self.sha256, # }) - return BaseBinary.model_validate({ + return Binary.model_validate({ **self.BINARY.model_dump(), 'abspath': self.abspath and Path(self.abspath), 'version': self.version, @@ -340,5 +341,5 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): 'overrides': self.BINARY.overrides, }) - def load_fresh(self) -> BaseBinary: - return self.BINARY.load(fresh=True) + def load_fresh(self) -> Binary: + return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True) diff --git a/archivebox/main.py b/archivebox/main.py index fab99dc9..ce6347b2 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -14,6 +14,10 @@ from crontab import CronTab, CronSlices from django.db.models import QuerySet from django.utils import timezone +from pydantic_pkgr import Binary + +import abx +import archivebox from archivebox.misc.checks import check_data_folder from archivebox.misc.util import enforce_types # type: ignore from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT @@ -197,13 +201,13 @@ def version(quiet: bool=False, from django.conf import settings - from abx.archivebox.base_binary import BaseBinary, apt, brew, env + from abx_plugin_default_binproviders import apt, brew, env from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID from archivebox.config.paths import get_data_locations, get_code_locations - from plugins_auth.ldap.config import LDAP_CONFIG + LDAP_ENABLED = archivebox.pm.hook.get_FLAT_CONFIG().LDAP_ENABLED # 0.7.1 @@ -242,7 +246,7 @@ def version(quiet: bool=False, f'SUDO={CONSTANTS.IS_ROOT}', f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}', f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}', - f'LDAP={LDAP_CONFIG.LDAP_ENABLED}', + f'LDAP={LDAP_ENABLED}', #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually ) prnt() @@ -264,7 +268,8 @@ def version(quiet: bool=False, prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]') failures = [] - for name, binary in list(settings.BINARIES.items()): + BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES()) + for name, binary in list(BINARIES.items()): if binary.name == 'archivebox': continue @@ -295,14 +300,15 @@ def version(quiet: bool=False, prnt() prnt('[gold3][i] Package Managers:[/gold3]') - for name, binprovider in list(settings.BINPROVIDERS.items()): + BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()) + for name, binprovider in list(BINPROVIDERS.items()): err = None if binproviders and binprovider.name not in binproviders: continue # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN - loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew]) + loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew]) abspath = None if loaded_bin.abspath: @@ -1050,10 +1056,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina # - recommend user re-run with sudo if any deps need to be installed as root from rich import print - from django.conf import settings - - import abx.archivebox.reads from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.config.paths import get_or_create_working_lib_dir @@ -1076,11 +1079,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina package_manager_names = ', '.join( f'[yellow]{binprovider.name}[/yellow]' - for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())) + for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())) if not binproviders or (binproviders and binprovider.name in binproviders) ) print(f'[+] Setting up package managers {package_manager_names}...') - for binprovider in reversed(list(abx.archivebox.reads.get_BINPROVIDERS().values())): + for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())): if binproviders and binprovider.name not in binproviders: continue try: @@ -1093,7 +1096,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print() - for binary in reversed(list(abx.archivebox.reads.get_BINARIES().values())): + for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())): if binary.name in ('archivebox', 'django', 'sqlite', 'python'): # obviously must already be installed if we are running continue @@ -1123,7 +1126,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) sys.stderr.write("\033[00m\n") # reset else: - result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) + loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False) + result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) if result and result['loaded_version']: break except Exception as e: @@ -1134,7 +1138,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) sys.stderr.write("\033[00m\n") # reset else: - binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) + loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run) + result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) if IS_ROOT and LIB_DIR: with SudoPermission(uid=0): if ARCHIVEBOX_USER == 0: @@ -1158,7 +1163,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr) - from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY + from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY extra_args = [] if binproviders: @@ -1184,8 +1189,6 @@ def config(config_options_str: Optional[str]=None, out_dir: Path=DATA_DIR) -> None: """Get and set your ArchiveBox project configuration values""" - import abx.archivebox.reads - from rich import print check_data_folder() @@ -1199,7 +1202,8 @@ def config(config_options_str: Optional[str]=None, elif config_options_str: config_options = config_options_str.split('\n') - from django.conf import settings + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() + CONFIGS = archivebox.pm.hook.get_CONFIGS() config_options = config_options or [] @@ -1209,8 +1213,8 @@ def config(config_options_str: Optional[str]=None, if search: if config_options: config_options = [get_real_name(key) for key in config_options] - matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} - for config_section in settings.CONFIGS.values(): + matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG} + for config_section in CONFIGS.values(): aliases = config_section.aliases for search_key in config_options: @@ -1229,15 +1233,15 @@ def config(config_options_str: Optional[str]=None, elif get or no_args: if config_options: config_options = [get_real_name(key) for key in config_options] - matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} - failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG] + matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG} + failed_config = [key for key in config_options if key not in FLAT_CONFIG] if failed_config: stderr() stderr('[X] These options failed to get', color='red') stderr(' {}'.format('\n '.join(config_options))) raise SystemExit(1) else: - matching_config = settings.FLAT_CONFIG + matching_config = FLAT_CONFIG print(printable_config(matching_config)) raise SystemExit(not matching_config) @@ -1258,20 +1262,20 @@ def config(config_options_str: Optional[str]=None, if key != raw_key: stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow') - if key in settings.FLAT_CONFIG: + if key in FLAT_CONFIG: new_config[key] = val.strip() else: failed_options.append(line) if new_config: - before = settings.FLAT_CONFIG + before = FLAT_CONFIG matching_config = write_config_file(new_config) - after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()} + after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()} print(printable_config(matching_config)) side_effect_changes = {} for key, val in after.items(): - if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config): + if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config): side_effect_changes[key] = after[key] # import ipdb; ipdb.set_trace() @@ -1313,7 +1317,7 @@ def schedule(add: bool=False, """Set ArchiveBox to regularly import URLs at specific times using cron""" check_data_folder() - from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY + from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY from archivebox.config.permissions import USER Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index b0322a1e..8a2894fe 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -201,6 +201,7 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True): def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True): + import archivebox from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.misc.logging import STDERR from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir @@ -209,6 +210,8 @@ def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_ex lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR + assert lib_dir == archivebox.pm.hook.get_LIB_DIR(), "lib_dir is not the same as the one in the flat config" + if not must_exist and not os.path.isdir(lib_dir): return True diff --git a/archivebox/misc/shell_welcome_message.py b/archivebox/misc/shell_welcome_message.py index 5b85e6bd..26314dc0 100644 --- a/archivebox/misc/shell_welcome_message.py +++ b/archivebox/misc/shell_welcome_message.py @@ -23,7 +23,7 @@ from archivebox import CONSTANTS # noqa from ..main import * # noqa from ..cli import CLI_SUBCOMMANDS -CONFIG = settings.FLAT_CONFIG +CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys()) if __name__ == '__main__': @@ -55,6 +55,5 @@ if __name__ == '__main__': prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]') prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]') prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]') - prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]') prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]') prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]') diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index 9b88d958..52dbba17 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -6,8 +6,7 @@ import re from typing import IO, Iterable, Optional from configparser import ConfigParser -from pocket import Pocket - +import archivebox from archivebox.config import CONSTANTS from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write @@ -22,7 +21,7 @@ API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db' _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))') -def get_pocket_articles(api: Pocket, since=None, page=0): +def get_pocket_articles(api, since=None, page=0): body, headers = api.get( state='archive', sort='oldest', @@ -94,7 +93,9 @@ def should_parse_as_pocket_api(text: str) -> bool: def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: """Parse bookmarks from the Pocket API""" - from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG + from pocket import Pocket + + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() input_buffer.seek(0) pattern = re.compile(r"^pocket:\/\/(\w+)") @@ -102,7 +103,7 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: if should_parse_as_pocket_api(line): username = pattern.search(line).group(1) - api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username]) + api = Pocket(FLAT_CONFIG.POCKET_CONSUMER_KEY, FLAT_CONFIG.POCKET_ACCESS_TOKENS[username]) api.last_since = None for article in get_pocket_articles(api, since=read_since(username)): diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index ad464537..20a792f3 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -8,9 +8,10 @@ from datetime import datetime from typing import IO, Iterable, Optional from configparser import ConfigParser +import abx + from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write -from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG from ..index.schema import Link @@ -62,26 +63,30 @@ def link_from_article(article: dict, sources: list): def write_cursor(username: str, since: str): - if not READWISE_CONFIG.READWISE_DB_PATH.exists(): - atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "") + READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH + + if not READWISE_DB_PATH.exists(): + atomic_write(READWISE_DB_PATH, "") since_file = ConfigParser() since_file.optionxform = str - since_file.read(READWISE_CONFIG.READWISE_DB_PATH) + since_file.read(READWISE_DB_PATH) since_file[username] = {"since": since} - with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new: + with open(READWISE_DB_PATH, "w+") as new: since_file.write(new) def read_cursor(username: str) -> Optional[str]: - if not READWISE_CONFIG.READWISE_DB_PATH.exists(): - atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "") + READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH + + if not READWISE_DB_PATH.exists(): + atomic_write(READWISE_DB_PATH, "") config_file = ConfigParser() config_file.optionxform = str - config_file.read(READWISE_CONFIG.READWISE_DB_PATH) + config_file.read(READWISE_DB_PATH) return config_file.get(username, "since", fallback=None) @@ -97,12 +102,14 @@ def should_parse_as_readwise_reader_api(text: str) -> bool: def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: """Parse bookmarks from the Readwise Reader API""" + READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS + input_buffer.seek(0) pattern = re.compile(r"^readwise-reader:\/\/(\w+)") for line in input_buffer: if should_parse_as_readwise_reader_api(line): username = pattern.search(line).group(1) - api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username)) + api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username)) for article in get_readwise_reader_articles(api): yield link_from_article(article, sources=[line]) diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 2e7d4f69..921c074f 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -6,8 +6,8 @@ from typing import List, Union from django.db.models import QuerySet from django.conf import settings -import abx.archivebox.reads - +import abx +import archivebox from archivebox.index.schema import Link from archivebox.misc.util import enforce_types from archivebox.misc.logging import stderr @@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet): def import_backend(): - for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values(): + for backend in abx.as_dict(archivebox.pm.hook.get_SEARCHBACKENDS()).values(): if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE: return backend raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend') diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py index fcd93405..de31354a 100644 --- a/archivebox/vendor/__init__.py +++ b/archivebox/vendor/__init__.py @@ -4,23 +4,27 @@ from pathlib import Path VENDOR_DIR = Path(__file__).parent -VENDORED_LIBS = { - # sys.path dir: library name - #'python-atomicwrites': 'atomicwrites', - #'django-taggit': 'taggit', - # 'pydantic-pkgr': 'pydantic_pkgr', - # 'pocket': 'pocket', - #'base32-crockford': 'base32_crockford', -} +VENDORED_LIBS = [ + 'abx', + 'pydantic-pkgr', + 'pocket', +] + +for subdir in reversed(sorted(VENDOR_DIR.iterdir())): + if subdir.is_dir() and subdir.name not in VENDORED_LIBS and not subdir.name.startswith('_'): + VENDORED_LIBS.append(subdir.name) def load_vendored_libs(): - for lib_subdir, lib_name in VENDORED_LIBS.items(): - lib_dir = VENDOR_DIR / lib_subdir - assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}' + if str(VENDOR_DIR) not in sys.path: + sys.path.append(str(VENDOR_DIR)) + + for lib_name in VENDORED_LIBS: + lib_dir = VENDOR_DIR / lib_name + assert lib_dir.is_dir(), f'Expected vendor libary {lib_name} could not be found in {lib_dir}' try: lib = importlib.import_module(lib_name) - # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}") + # print(f"Successfully imported lib from environment {lib_name}") except ImportError: sys.path.append(str(lib_dir)) try: diff --git a/packages/abx-plugin-archivedotorg-extractor/README.md b/archivebox/vendor/abx-plugin-archivedotorg/README.md similarity index 100% rename from packages/abx-plugin-archivedotorg-extractor/README.md rename to archivebox/vendor/abx-plugin-archivedotorg/README.md diff --git a/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py new file mode 100644 index 00000000..025d83bf --- /dev/null +++ b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py @@ -0,0 +1,21 @@ +__label__ = 'Archive.org' +__homepage__ = 'https://archive.org' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import ARCHIVEDOTORG_CONFIG + + return { + 'ARCHIVEDOTORG_CONFIG': ARCHIVEDOTORG_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import ARCHIVEDOTORG_EXTRACTOR +# +# return { +# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR, +# } diff --git a/archivebox/extractors/archive_org.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py similarity index 100% rename from archivebox/extractors/archive_org.py rename to archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py diff --git a/packages/abx-plugin-archivedotorg-extractor/config.py b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py similarity index 54% rename from packages/abx-plugin-archivedotorg-extractor/config.py rename to archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py index bebb6c98..f4c146ab 100644 --- a/packages/abx-plugin-archivedotorg-extractor/config.py +++ b/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.archivedotorg' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class ArchivedotorgConfig(BaseConfigSet): diff --git a/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml b/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml new file mode 100644 index 00000000..36c91f3c --- /dev/null +++ b/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-archivedotorg" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_archivedotorg = "abx_plugin_archivedotorg" diff --git a/packages/abx-plugin-chrome-extractor/README.md b/archivebox/vendor/abx-plugin-chrome/README.md similarity index 100% rename from packages/abx-plugin-chrome-extractor/README.md rename to archivebox/vendor/abx-plugin-chrome/README.md diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py new file mode 100644 index 00000000..c300bd13 --- /dev/null +++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py @@ -0,0 +1,34 @@ +__label__ = 'Chrome' +__author__ = 'ArchiveBox' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import CHROME_CONFIG + + return { + 'CHROME_CONFIG': CHROME_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import CHROME_BINARY + + return { + 'chrome': CHROME_BINARY, + } + +@abx.hookimpl +def ready(): + from .config import CHROME_CONFIG + CHROME_CONFIG.validate() + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# return { +# 'pdf': PDF_EXTRACTOR, +# 'screenshot': SCREENSHOT_EXTRACTOR, +# 'dom': DOM_EXTRACTOR, +# } diff --git a/packages/abx-plugin-chrome-extractor/binaries.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py similarity index 92% rename from packages/abx-plugin-chrome-extractor/binaries.py rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py index a79b66a2..f315c992 100644 --- a/packages/abx-plugin-chrome-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_extractor.chrome' - import os import platform from pathlib import Path @@ -7,17 +5,18 @@ from typing import List, Optional from pydantic import InstanceOf from pydantic_pkgr import ( + Binary, BinProvider, BinName, BinaryOverrides, bin_abspath, ) -import abx.archivebox.reads -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +import abx -from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER -from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER +from abx_plugin_default_binproviders import apt, brew, env +from abx_plugin_puppeteer.binproviders import PUPPETEER_BINPROVIDER +from abx_plugin_playwright.binproviders import PLAYWRIGHT_BINPROVIDER from .config import CHROME_CONFIG @@ -81,7 +80,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path): ###################### Config ########################## -class ChromeBinary(BaseBinary): +class ChromeBinary(Binary): name: BinName = CHROME_CONFIG.CHROME_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew] @@ -105,7 +104,7 @@ class ChromeBinary(BaseBinary): @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: - bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin' + bin_dir = bin_dir or abx.pm.hook.get_BIN_DIR() if not (binary.abspath and os.path.isfile(binary.abspath)): return diff --git a/packages/abx-plugin-chrome-extractor/config.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py similarity index 98% rename from packages/abx-plugin-chrome-extractor/config.py rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py index be62f360..fb1d9095 100644 --- a/packages/abx-plugin-chrome-extractor/config.py +++ b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_extractor.chrome' - import os from pathlib import Path from typing import List, Optional @@ -7,8 +5,8 @@ from typing import List, Optional from pydantic import Field from pydantic_pkgr import bin_abspath -from abx.archivebox.base_configset import BaseConfigSet -from abx.archivebox.base_binary import env +from abx_spec_config.base_configset import BaseConfigSet +from abx_plugin_default_binproviders import env from archivebox.config import CONSTANTS from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG diff --git a/archivebox/extractors/dom.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py similarity index 100% rename from archivebox/extractors/dom.py rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py diff --git a/archivebox/extractors/pdf.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py similarity index 100% rename from archivebox/extractors/pdf.py rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py diff --git a/archivebox/extractors/screenshot.py b/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py similarity index 100% rename from archivebox/extractors/screenshot.py rename to archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py diff --git a/archivebox/vendor/abx-plugin-chrome/pyproject.toml b/archivebox/vendor/abx-plugin-chrome/pyproject.toml new file mode 100644 index 00000000..da26078d --- /dev/null +++ b/archivebox/vendor/abx-plugin-chrome/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-chrome" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_chrome = "abx_plugin_chrome" diff --git a/packages/abx-plugin-curl-extractor/README.md b/archivebox/vendor/abx-plugin-curl/README.md similarity index 100% rename from packages/abx-plugin-curl-extractor/README.md rename to archivebox/vendor/abx-plugin-curl/README.md diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py new file mode 100644 index 00000000..7988ef5e --- /dev/null +++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py @@ -0,0 +1,18 @@ +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import CURL_CONFIG + + return { + 'curl': CURL_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import CURL_BINARY + + return { + 'curl': CURL_BINARY, + } diff --git a/packages/abx-plugin-curl-extractor/binaries.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py similarity index 57% rename from packages/abx-plugin-curl-extractor/binaries.py rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py index 41ff9616..32628248 100644 --- a/packages/abx-plugin-curl-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.curl' +__package__ = 'abx_plugin_curl' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import CURL_CONFIG -class CurlBinary(BaseBinary): +class CurlBinary(Binary): name: BinName = CURL_CONFIG.CURL_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-curl-extractor/config.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py similarity index 90% rename from packages/abx-plugin-curl-extractor/config.py rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py index 14996f66..69f4a637 100644 --- a/packages/abx-plugin-curl-extractor/config.py +++ b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_extractor.curl' +__package__ = 'abx_plugin_curl' from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/extractors/headers.py b/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py similarity index 100% rename from archivebox/extractors/headers.py rename to archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py diff --git a/archivebox/vendor/abx-plugin-curl/pyproject.toml b/archivebox/vendor/abx-plugin-curl/pyproject.toml new file mode 100644 index 00000000..f3c6ad55 --- /dev/null +++ b/archivebox/vendor/abx-plugin-curl/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-curl" +version = "2024.10.24" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_curl = "abx_plugin_curl" diff --git a/packages/abx-plugin-default-binproviders/README.md b/archivebox/vendor/abx-plugin-default-binproviders/README.md similarity index 100% rename from packages/abx-plugin-default-binproviders/README.md rename to archivebox/vendor/abx-plugin-default-binproviders/README.md diff --git a/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py similarity index 99% rename from packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py rename to archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py index 2a628a4e..58dbdac9 100644 --- a/packages/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py +++ b/archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py @@ -16,7 +16,6 @@ env = ENV_BINPROVIDER = EnvProvider() @abx.hookimpl(tryfirst=True) def get_BINPROVIDERS() -> Dict[str, BinProvider]: - return { 'apt': APT_BINPROVIDER, 'brew': BREW_BINPROVIDER, diff --git a/packages/abx-plugin-default-binproviders/pyproject.toml b/archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml similarity index 100% rename from packages/abx-plugin-default-binproviders/pyproject.toml rename to archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml diff --git a/packages/abx-plugin-favicon-extractor/README.md b/archivebox/vendor/abx-plugin-favicon/README.md similarity index 100% rename from packages/abx-plugin-favicon-extractor/README.md rename to archivebox/vendor/abx-plugin-favicon/README.md diff --git a/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py new file mode 100644 index 00000000..75004e3d --- /dev/null +++ b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py @@ -0,0 +1,29 @@ +__label__ = 'Favicon' +__version__ = '2024.10.24' +__author__ = 'ArchiveBox' +__homepage__ = 'https://github.com/ArchiveBox/archivebox' +__dependencies__ = [ + 'abx>=0.1.0', + 'abx-spec-config>=0.1.0', + 'abx-plugin-curl-extractor>=2024.10.24', +] + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import FAVICON_CONFIG + + return { + 'FAVICON_CONFIG': FAVICON_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import FAVICON_EXTRACTOR + +# return { +# 'favicon': FAVICON_EXTRACTOR, +# } diff --git a/packages/abx-plugin-favicon-extractor/config.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py similarity index 64% rename from packages/abx-plugin-favicon-extractor/config.py rename to archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py index 6073ef87..8b97d758 100644 --- a/packages/abx-plugin-favicon-extractor/config.py +++ b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.favicon' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class FaviconConfig(BaseConfigSet): diff --git a/archivebox/extractors/favicon.py b/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py similarity index 100% rename from archivebox/extractors/favicon.py rename to archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py diff --git a/archivebox/vendor/abx-plugin-favicon/pyproject.toml b/archivebox/vendor/abx-plugin-favicon/pyproject.toml new file mode 100644 index 00000000..cad10890 --- /dev/null +++ b/archivebox/vendor/abx-plugin-favicon/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-favicon" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.28", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_favicon = "abx_plugin_favicon" diff --git a/packages/abx-plugin-git-extractor/README.md b/archivebox/vendor/abx-plugin-git/README.md similarity index 100% rename from packages/abx-plugin-git-extractor/README.md rename to archivebox/vendor/abx-plugin-git/README.md diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py new file mode 100644 index 00000000..61c04b9c --- /dev/null +++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py @@ -0,0 +1,29 @@ +__package__ = 'abx_plugin_git' +__label__ = 'Git' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import GIT_CONFIG + + return { + 'GIT_CONFIG': GIT_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import GIT_BINARY + + return { + 'git': GIT_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import GIT_EXTRACTOR + + return { + 'git': GIT_EXTRACTOR, + } diff --git a/packages/abx-plugin-git-extractor/binaries.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py similarity index 57% rename from packages/abx-plugin-git-extractor/binaries.py rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py index 8d990769..f352fd99 100644 --- a/packages/abx-plugin-git-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.git' +__package__ = 'abx_plugin_git' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import GIT_CONFIG -class GitBinary(BaseBinary): +class GitBinary(Binary): name: BinName = GIT_CONFIG.GIT_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-git-extractor/config.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py similarity index 87% rename from packages/abx-plugin-git-extractor/config.py rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py index 3d890d62..d8a9ca17 100644 --- a/packages/abx-plugin-git-extractor/config.py +++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_extractor.git' +__package__ = 'abx_plugin_git' from typing import List from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py new file mode 100644 index 00000000..4863d031 --- /dev/null +++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py @@ -0,0 +1,15 @@ +__package__ = 'abx_plugin_git' + +# from pathlib import Path + +# from .binaries import GIT_BINARY + + +# class GitExtractor(BaseExtractor): +# name: ExtractorName = 'git' +# binary: str = GIT_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# return snapshot.as_link() / 'git' + +# GIT_EXTRACTOR = GitExtractor() diff --git a/archivebox/extractors/git.py b/archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py similarity index 95% rename from archivebox/extractors/git.py rename to archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py index 9ac71d3e..128ba0e7 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py @@ -16,8 +16,8 @@ from archivebox.misc.util import ( from ..logging_util import TimedProgress from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from archivebox.plugins_extractor.git.config import GIT_CONFIG -from archivebox.plugins_extractor.git.binaries import GIT_BINARY +from abx_plugin_git.config import GIT_CONFIG +from abx_plugin_git.binaries import GIT_BINARY def get_output_path(): diff --git a/archivebox/vendor/abx-plugin-git/pyproject.toml b/archivebox/vendor/abx-plugin-git/pyproject.toml new file mode 100644 index 00000000..384599b7 --- /dev/null +++ b/archivebox/vendor/abx-plugin-git/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-git" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_git = "abx_plugin_git" diff --git a/packages/abx-plugin-htmltotext-extractor/README.md b/archivebox/vendor/abx-plugin-htmltotext/README.md similarity index 100% rename from packages/abx-plugin-htmltotext-extractor/README.md rename to archivebox/vendor/abx-plugin-htmltotext/README.md diff --git a/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py new file mode 100644 index 00000000..ebbc6800 --- /dev/null +++ b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py @@ -0,0 +1,22 @@ +__package__ = 'abx_plugin_htmltotext' +__label__ = 'HTML-to-Text' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import HTMLTOTEXT_CONFIG + + return { + 'HTMLTOTEXT_CONFIG': HTMLTOTEXT_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import FAVICON_EXTRACTOR + +# return { +# 'htmltotext': FAVICON_EXTRACTOR, +# } diff --git a/packages/abx-plugin-htmltotext-extractor/config.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py similarity index 52% rename from packages/abx-plugin-htmltotext-extractor/config.py rename to archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py index 31b9bff5..bd3aabc6 100644 --- a/packages/abx-plugin-htmltotext-extractor/config.py +++ b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.htmltotext' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class HtmltotextConfig(BaseConfigSet): diff --git a/archivebox/extractors/htmltotext.py b/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py similarity index 100% rename from archivebox/extractors/htmltotext.py rename to archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py diff --git a/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml b/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml new file mode 100644 index 00000000..46ebaa46 --- /dev/null +++ b/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-htmltotext" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_htmltotext = "abx_plugin_htmltotext" diff --git a/packages/abx-plugin-ldap-auth/README.md b/archivebox/vendor/abx-plugin-ldap-auth/README.md similarity index 100% rename from packages/abx-plugin-ldap-auth/README.md rename to archivebox/vendor/abx-plugin-ldap-auth/README.md diff --git a/packages/abx-plugin-ldap-auth/__init__.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py similarity index 68% rename from packages/abx-plugin-ldap-auth/__init__.py rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py index 6ba43b90..d4ac6431 100644 --- a/packages/abx-plugin-ldap-auth/__init__.py +++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py @@ -1,36 +1,15 @@ -__package__ = 'plugins_auth.ldap' -__id__ = 'ldap' +__package__ = 'abx_plugin_ldap_auth' __label__ = 'LDAP' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' __homepage__ = 'https://github.com/django-auth-ldap/django-auth-ldap' -__dependencies__ = ['pip'] import abx - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - - - @abx.hookimpl def get_CONFIG(): from .config import LDAP_CONFIG return { - __id__: LDAP_CONFIG + 'LDAP_CONFIG': LDAP_CONFIG } @abx.hookimpl @@ -48,12 +27,12 @@ def create_superuser_from_ldap_user(sender, user=None, ldap_user=None, **kwargs) ArchiveBox requires staff/superuser status to view the admin at all, so we must create a user + set staff and superuser when LDAP authenticates a new person. """ - from django.conf import settings + from .config import LDAP_CONFIG if user is None: return # not authenticated at all - if not user.id and settings.CONFIGS.ldap.LDAP_CREATE_SUPERUSER: + if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER: user.is_superuser = True # authenticated via LDAP, but user is not set up in DB yet user.is_staff = True @@ -69,9 +48,7 @@ def ready(): LDAP_CONFIG.validate() - from django.conf import settings - - if settings.CONFIGS.ldap.LDAP_ENABLED: + if LDAP_CONFIG.LDAP_ENABLED: # tell django-auth-ldap to call our function when a user is authenticated via LDAP import django_auth_ldap.backend django_auth_ldap.backend.populate_user.connect(create_superuser_from_ldap_user) diff --git a/packages/abx-plugin-ldap-auth/binaries.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py similarity index 78% rename from packages/abx-plugin-ldap-auth/binaries.py rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py index cc932183..8ea4776d 100644 --- a/packages/abx-plugin-ldap-auth/binaries.py +++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py @@ -1,5 +1,4 @@ -__package__ = 'plugins_auth.ldap' - +__package__ = 'abx_plugin_ldap_auth' import inspect @@ -7,12 +6,10 @@ from typing import List from pathlib import Path from pydantic import InstanceOf -from pydantic_pkgr import BinaryOverrides, SemVer +from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider - -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, apt - -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES +from abx_plugin_default_binproviders import apt +from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES from .config import get_ldap_lib @@ -39,10 +36,10 @@ def get_LDAP_LIB_version(): return LDAP_LIB and SemVer(LDAP_LIB.__version__) -class LdapBinary(BaseBinary): +class LdapBinary(Binary): name: str = 'ldap' description: str = 'LDAP Authentication' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt] + binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt] overrides: BinaryOverrides = { LIB_PIP_BINPROVIDER.name: { diff --git a/packages/abx-plugin-ldap-auth/config.py b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py similarity index 96% rename from packages/abx-plugin-ldap-auth/config.py rename to archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py index 2094dc68..451c9da8 100644 --- a/packages/abx-plugin-ldap-auth/config.py +++ b/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_auth.ldap' +__package__ = 'abx_plugin_ldap_auth' import sys from typing import Dict, List, Optional -from pydantic import Field, model_validator, computed_field +from pydantic import Field, computed_field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet LDAP_LIB = None LDAP_SEARCH = None diff --git a/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml b/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml new file mode 100644 index 00000000..a89d0cbc --- /dev/null +++ b/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-ldap-auth" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-django>=0.1.0", +] + + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + + +[project.entry-points.abx] +abx_plugin_ldap_auth = "abx_plugin_ldap_auth" diff --git a/packages/abx-plugin-mercury-extractor/README.md b/archivebox/vendor/abx-plugin-mercury/README.md similarity index 100% rename from packages/abx-plugin-mercury-extractor/README.md rename to archivebox/vendor/abx-plugin-mercury/README.md diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py new file mode 100644 index 00000000..7b6fcfd6 --- /dev/null +++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py @@ -0,0 +1,29 @@ +__package__ = 'abx_plugin_mercury' +__label__ = 'Postlight Parser' +__homepage__ = 'https://github.com/postlight/mercury-parser' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import MERCURY_CONFIG + + return { + 'MERCURY_CONFIG': MERCURY_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import MERCURY_BINARY + + return { + 'mercury': MERCURY_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import MERCURY_EXTRACTOR + + return { + 'mercury': MERCURY_EXTRACTOR, + } diff --git a/packages/abx-plugin-mercury-extractor/binaries.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py similarity index 78% rename from packages/abx-plugin-mercury-extractor/binaries.py rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py index b07055fd..f015a7ca 100644 --- a/packages/abx-plugin-mercury-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py @@ -1,18 +1,18 @@ -__package__ = 'plugins_extractor.mercury' +__package__ = 'abx_plugin_mercury' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath +from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary -from abx.archivebox.base_binary import BaseBinary, env +from abx_plugin_default_binproviders import env -from archivebox.plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import MERCURY_CONFIG -class MercuryBinary(BaseBinary): +class MercuryBinary(Binary): name: BinName = MERCURY_CONFIG.MERCURY_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/packages/abx-plugin-mercury-extractor/config.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py similarity index 90% rename from packages/abx-plugin-mercury-extractor/config.py rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py index 49c92b73..00fa82a4 100644 --- a/packages/abx-plugin-mercury-extractor/config.py +++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_extractor.mercury' +__package__ = 'abx_plugin_mercury' from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py new file mode 100644 index 00000000..36a17f3a --- /dev/null +++ b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py @@ -0,0 +1,17 @@ +__package__ = 'abx_plugin_mercury' + +# from pathlib import Path + +# from .binaries import MERCURY_BINARY + + + +# class MercuryExtractor(BaseExtractor): +# name: ExtractorName = 'mercury' +# binary: str = MERCURY_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# return snapshot.link_dir / 'mercury' / 'content.html' + + +# MERCURY_EXTRACTOR = MercuryExtractor() diff --git a/archivebox/extractors/mercury.py b/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py similarity index 100% rename from archivebox/extractors/mercury.py rename to archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py diff --git a/archivebox/vendor/abx-plugin-mercury/pyproject.toml b/archivebox/vendor/abx-plugin-mercury/pyproject.toml new file mode 100644 index 00000000..c740008b --- /dev/null +++ b/archivebox/vendor/abx-plugin-mercury/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-mercury" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_mercury = "abx_plugin_mercury" diff --git a/packages/abx-plugin-npm-binprovider/README.md b/archivebox/vendor/abx-plugin-npm/README.md similarity index 100% rename from packages/abx-plugin-npm-binprovider/README.md rename to archivebox/vendor/abx-plugin-npm/README.md diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py similarity index 86% rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py index 3901516e..d1f56f35 100644 --- a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/__init__.py +++ b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py @@ -1,5 +1,3 @@ -__package__ = 'abx_plugin_npm_binprovider' -__id__ = 'npm' __label__ = 'NPM' __author__ = 'ArchiveBox' __homepage__ = 'https://www.npmjs.com/' @@ -10,9 +8,8 @@ import abx @abx.hookimpl def get_CONFIG(): from .config import NPM_CONFIG - return { - __id__: NPM_CONFIG, + 'NPM_CONFIG': NPM_CONFIG, } @abx.hookimpl diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py similarity index 100% rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binaries.py rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py similarity index 94% rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py index e0b26a90..dd56e3a9 100644 --- a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/binproviders.py +++ b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py @@ -26,8 +26,7 @@ class LibNpmBinProvider(NpmProvider): def setup(self) -> None: # update paths from config at runtime - LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR - + LIB_DIR = abx.pm.hook.get_LIB_DIR() self.npm_prefix = LIB_DIR / 'npm' self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' diff --git a/packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py b/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py similarity index 100% rename from packages/abx-plugin-npm-binprovider/abx_plugin_npm_binprovider/config.py rename to archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py diff --git a/packages/abx-plugin-npm-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-npm/pyproject.toml similarity index 81% rename from packages/abx-plugin-npm-binprovider/pyproject.toml rename to archivebox/vendor/abx-plugin-npm/pyproject.toml index 5d614f90..1371b2c4 100644 --- a/packages/abx-plugin-npm-binprovider/pyproject.toml +++ b/archivebox/vendor/abx-plugin-npm/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "abx-plugin-npm-binprovider" +name = "abx-plugin-npm" version = "2024.10.24" description = "NPM binary provider plugin for ABX" readme = "README.md" @@ -17,4 +17,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.entry-points.abx] -abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider" +abx_plugin_npm = "abx_plugin_npm" diff --git a/packages/abx-plugin-pip-binprovider/README.md b/archivebox/vendor/abx-plugin-pip/README.md similarity index 100% rename from packages/abx-plugin-pip-binprovider/README.md rename to archivebox/vendor/abx-plugin-pip/README.md diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order similarity index 100% rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/.plugin_order rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py similarity index 90% rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py index 8445055f..eebcdb5b 100644 --- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/__init__.py +++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py @@ -1,5 +1,4 @@ -__package__ = 'abx_plugin_pip_binprovider' -__id__ = 'pip' +__package__ = 'abx_plugin_pip' __label__ = 'PIP' import abx @@ -10,7 +9,7 @@ def get_CONFIG(): from .config import PIP_CONFIG return { - __id__: PIP_CONFIG + 'PIP_CONFIG': PIP_CONFIG } @abx.hookimpl(tryfirst=True) diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py similarity index 99% rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py index b1974250..18e5f34f 100644 --- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binaries.py +++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py @@ -1,4 +1,4 @@ -__package__ = 'abx_plugin_pip_binprovider' +__package__ = 'abx_plugin_pip' import sys from pathlib import Path diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py similarity index 98% rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py index 1c245b62..c29798b0 100644 --- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/binproviders.py +++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py @@ -58,7 +58,7 @@ class LibPipBinProvider(PipProvider): def setup(self) -> None: # update venv path to match most up-to-date LIB_DIR based on runtime config - LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR + LIB_DIR = abx.pm.hook.get_LIB_DIR() self.pip_venv = LIB_DIR / 'pip' / 'venv' super().setup() diff --git a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py similarity index 86% rename from packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py rename to archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py index 26cf0f8e..f7464810 100644 --- a/packages/abx-plugin-pip-binprovider/abx_plugin_pip_binprovider/config.py +++ b/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py @@ -3,7 +3,7 @@ __package__ = 'pip' from typing import List, Optional from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class PipDependencyConfigs(BaseConfigSet): diff --git a/packages/abx-plugin-pip-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-pip/pyproject.toml similarity index 82% rename from packages/abx-plugin-pip-binprovider/pyproject.toml rename to archivebox/vendor/abx-plugin-pip/pyproject.toml index 3f6364e0..03f88d0b 100644 --- a/packages/abx-plugin-pip-binprovider/pyproject.toml +++ b/archivebox/vendor/abx-plugin-pip/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "abx-plugin-pip-binprovider" +name = "abx-plugin-pip" version = "2024.10.24" description = "Add your description here" readme = "README.md" @@ -19,4 +19,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.entry-points.abx] -abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider" +abx_plugin_pip = "abx_plugin_pip" diff --git a/packages/abx-plugin-playwright-binprovider/README.md b/archivebox/vendor/abx-plugin-playwright/README.md similarity index 100% rename from packages/abx-plugin-playwright-binprovider/README.md rename to archivebox/vendor/abx-plugin-playwright/README.md diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py similarity index 78% rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py index 557f12c0..6d3ed715 100644 --- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/__init__.py +++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py @@ -1,7 +1,4 @@ -__package__ = 'abx_plugin_playwright_binprovider' -__id__ = 'playwright' __label__ = 'Playwright' -__author__ = 'ArchiveBox' __homepage__ = 'https://github.com/microsoft/playwright-python' import abx @@ -10,9 +7,8 @@ import abx @abx.hookimpl def get_CONFIG(): from .config import PLAYWRIGHT_CONFIG - return { - __id__: PLAYWRIGHT_CONFIG + 'PLAYWRIGHT_CONFIG': PLAYWRIGHT_CONFIG } @abx.hookimpl diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py similarity index 73% rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py index 333da054..4b77d9d4 100644 --- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binaries.py +++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py @@ -1,4 +1,4 @@ -__package__ = 'abx_plugin_playwright_binprovider' +__package__ = 'abx_plugin_playwright' from typing import List @@ -6,7 +6,7 @@ from pydantic import InstanceOf from pydantic_pkgr import BinName, BinProvider, Binary -from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER from abx_plugin_default_binproviders import env from .config import PLAYWRIGHT_CONFIG diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py similarity index 98% rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py index 8e472988..6bc44815 100644 --- a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/binproviders.py +++ b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py @@ -1,4 +1,4 @@ -__package__ = 'abx_plugin_playwright_binprovider' +__package__ = 'abx_plugin_playwright' import os import shutil @@ -59,7 +59,7 @@ class PlaywrightBinProvider(BinProvider): def setup(self) -> None: # update paths from config at runtime - LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR + LIB_DIR = abx.pm.hook.get_LIB_DIR() self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" diff --git a/packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py b/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py similarity index 100% rename from packages/abx-plugin-playwright-binprovider/abx_plugin_playwright_binprovider/config.py rename to archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py diff --git a/packages/abx-plugin-playwright-binprovider/pyproject.toml b/archivebox/vendor/abx-plugin-playwright/pyproject.toml similarity index 72% rename from packages/abx-plugin-playwright-binprovider/pyproject.toml rename to archivebox/vendor/abx-plugin-playwright/pyproject.toml index a6c8937b..0ad0d995 100644 --- a/packages/abx-plugin-playwright-binprovider/pyproject.toml +++ b/archivebox/vendor/abx-plugin-playwright/pyproject.toml @@ -1,6 +1,6 @@ [project] -name = "abx-plugin-playwright-binprovider" -version = "2024.10.24" +name = "abx-plugin-playwright" +version = "2024.10.28" description = "Add your description here" readme = "README.md" requires-python = ">=3.10" @@ -17,4 +17,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project.entry-points.abx] -abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider" +abx_plugin_playwright = "abx_plugin_playwright" diff --git a/packages/abx-plugin-pocket-extractor/README.md b/archivebox/vendor/abx-plugin-pocket/README.md similarity index 100% rename from packages/abx-plugin-pocket-extractor/README.md rename to archivebox/vendor/abx-plugin-pocket/README.md diff --git a/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py new file mode 100644 index 00000000..09e5dc8f --- /dev/null +++ b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py @@ -0,0 +1,18 @@ +__package__ = 'abx_plugin_pocket' +__label__ = 'Pocket' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import POCKET_CONFIG + + return { + 'POCKET_CONFIG': POCKET_CONFIG + } + +@abx.hookimpl +def ready(): + from .config import POCKET_CONFIG + POCKET_CONFIG.validate() diff --git a/packages/abx-plugin-pocket-extractor/config.py b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py similarity index 76% rename from packages/abx-plugin-pocket-extractor/config.py rename to archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py index 7866a1f6..2db072a1 100644 --- a/packages/abx-plugin-pocket-extractor/config.py +++ b/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_extractor.pocket' +__package__ = 'abx_plugin_pocket' from typing import Dict from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class PocketConfig(BaseConfigSet): diff --git a/archivebox/vendor/abx-plugin-pocket/pyproject.toml b/archivebox/vendor/abx-plugin-pocket/pyproject.toml new file mode 100644 index 00000000..999fa098 --- /dev/null +++ b/archivebox/vendor/abx-plugin-pocket/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-pocket" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "pocket>=0.3.6", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_pocket = "abx_plugin_pocket" diff --git a/packages/abx-plugin-puppeteer-binprovider/README.md b/archivebox/vendor/abx-plugin-puppeteer/README.md similarity index 100% rename from packages/abx-plugin-puppeteer-binprovider/README.md rename to archivebox/vendor/abx-plugin-puppeteer/README.md diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py new file mode 100644 index 00000000..1ee876d6 --- /dev/null +++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py @@ -0,0 +1,30 @@ +__package__ = 'abx_plugin_puppeteer' +__label__ = 'Puppeteer' +__homepage__ = 'https://github.com/puppeteer/puppeteer' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import PUPPETEER_CONFIG + + return { + 'PUPPETEER_CONFIG': PUPPETEER_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import PUPPETEER_BINARY + + return { + 'puppeteer': PUPPETEER_BINARY, + } + +@abx.hookimpl +def get_BINPROVIDERS(): + from .binproviders import PUPPETEER_BINPROVIDER + + return { + 'puppeteer': PUPPETEER_BINPROVIDER, + } diff --git a/packages/abx-plugin-puppeteer-binprovider/binaries.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py similarity index 54% rename from packages/abx-plugin-puppeteer-binprovider/binaries.py rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py index 7e592bba..8afd484f 100644 --- a/packages/abx-plugin-puppeteer-binprovider/binaries.py +++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py @@ -1,20 +1,20 @@ -__package__ = 'plugins_pkg.puppeteer' +__package__ = 'abx_plugin_puppeteer' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env +from abx_plugin_default_binproviders import env -from plugins_pkg.npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER ###################### Config ########################## -class PuppeteerBinary(BaseBinary): +class PuppeteerBinary(Binary): name: BinName = "puppeteer" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/packages/abx-plugin-puppeteer-binprovider/binproviders.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py similarity index 93% rename from packages/abx-plugin-puppeteer-binprovider/binproviders.py rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py index 0fa9ca33..e7b697bd 100644 --- a/packages/abx-plugin-puppeteer-binprovider/binproviders.py +++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_pkg.puppeteer' - import os import platform from pathlib import Path @@ -7,6 +5,7 @@ from typing import List, Optional, Dict, ClassVar from pydantic import Field from pydantic_pkgr import ( + BinProvider, BinName, BinProviderName, BinProviderOverrides, @@ -15,15 +14,15 @@ from pydantic_pkgr import ( HostBinPath, ) +import abx + from archivebox.config import CONSTANTS from archivebox.config.permissions import ARCHIVEBOX_USER -from abx.archivebox.base_binary import BaseBinProvider - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER -class PuppeteerBinProvider(BaseBinProvider): +class PuppeteerBinProvider(BinProvider): name: BinProviderName = "puppeteer" INSTALLER_BIN: BinName = "npx" @@ -44,9 +43,10 @@ class PuppeteerBinProvider(BaseBinProvider): def setup(self) -> None: # update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time # we want to avoid depending on archivebox from abx code if at all possible - from archivebox.config.common import STORAGE_CONFIG - self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers' - self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin') + LIB_DIR = abx.pm.hook.get_LIB_DIR() + BIN_DIR = abx.pm.hook.get_BIN_DIR() + self.puppeteer_browsers_dir = LIB_DIR / 'browsers' + self.PATH = str(BIN_DIR) assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized" diff --git a/packages/abx-plugin-puppeteer-binprovider/config.py b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py similarity index 79% rename from packages/abx-plugin-puppeteer-binprovider/config.py rename to archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py index b76d0779..f09e7062 100644 --- a/packages/abx-plugin-puppeteer-binprovider/config.py +++ b/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py @@ -1,7 +1,7 @@ -__package__ = 'plugins_pkg.puppeteer' +__package__ = 'abx_plugin_puppeteer' -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet ###################### Config ########################## diff --git a/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml b/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml new file mode 100644 index 00000000..2633b481 --- /dev/null +++ b/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-puppeteer" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_puppeteer = "abx_plugin_puppeteer" diff --git a/packages/abx-plugin-readability-extractor/README.md b/archivebox/vendor/abx-plugin-readability/README.md similarity index 100% rename from packages/abx-plugin-readability-extractor/README.md rename to archivebox/vendor/abx-plugin-readability/README.md diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py new file mode 100644 index 00000000..cb7d35af --- /dev/null +++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py @@ -0,0 +1,30 @@ +__package__ = 'abx_plugin_readability' +__label__ = 'Readability' +__homepage__ = 'https://github.com/ArchiveBox/readability-extractor' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import READABILITY_CONFIG + + return { + 'READABILITY_CONFIG': READABILITY_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import READABILITY_BINARY + + return { + 'readability': READABILITY_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import READABILITY_EXTRACTOR + + return { + 'readability': READABILITY_EXTRACTOR, + } diff --git a/packages/abx-plugin-readability-extractor/binaries.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py similarity index 69% rename from packages/abx-plugin-readability-extractor/binaries.py rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py index 43343924..65ecf57c 100644 --- a/packages/abx-plugin-readability-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py @@ -1,20 +1,19 @@ -__package__ = 'plugins_extractor.readability' +__package__ = 'abx_plugin_readability' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName -from abx.archivebox.base_binary import BaseBinary, env - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_default_binproviders import env +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import READABILITY_CONFIG READABILITY_PACKAGE_NAME = 'github:ArchiveBox/readability-extractor' -class ReadabilityBinary(BaseBinary): +class ReadabilityBinary(Binary): name: BinName = READABILITY_CONFIG.READABILITY_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/packages/abx-plugin-readability-extractor/config.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py similarity index 83% rename from packages/abx-plugin-readability-extractor/config.py rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py index 8066d56c..726295fe 100644 --- a/packages/abx-plugin-readability-extractor/config.py +++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py @@ -1,8 +1,6 @@ -__package__ = 'plugins_extractor.readability' - from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py new file mode 100644 index 00000000..64d712ed --- /dev/null +++ b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py @@ -0,0 +1,19 @@ +# __package__ = 'abx_plugin_readability' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + + +# from .binaries import READABILITY_BINARY + + +# class ReadabilityExtractor(BaseExtractor): +# name: str = 'readability' +# binary: BinName = READABILITY_BINARY.name + +# def get_output_path(self, snapshot) -> Path: +# return Path(snapshot.link_dir) / 'readability' / 'content.html' + + +# READABILITY_EXTRACTOR = ReadabilityExtractor() diff --git a/archivebox/extractors/readability.py b/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py similarity index 100% rename from archivebox/extractors/readability.py rename to archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py diff --git a/archivebox/vendor/abx-plugin-readability/pyproject.toml b/archivebox/vendor/abx-plugin-readability/pyproject.toml new file mode 100644 index 00000000..59a2db64 --- /dev/null +++ b/archivebox/vendor/abx-plugin-readability/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-readability" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_readability = "abx_plugin_readability" diff --git a/packages/abx-plugin-readwise-extractor/README.md b/archivebox/vendor/abx-plugin-readwise/README.md similarity index 100% rename from packages/abx-plugin-readwise-extractor/README.md rename to archivebox/vendor/abx-plugin-readwise/README.md diff --git a/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py b/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py new file mode 100644 index 00000000..ea31cd14 --- /dev/null +++ b/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_readwise_extractor' +__id__ = 'abx_plugin_readwise_extractor' +__label__ = 'Readwise API' +__version__ = '2024.10.27' +__author__ = 'ArchiveBox' +__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise' +__dependencies__ = [] + +import abx + +from typing import Dict +from pathlib import Path + +from pydantic import Field + +from abx_spec_config.base_configset import BaseConfigSet + +SOURCES_DIR = abx.pm.hook.get_CONFIG().SOURCES_DIR + + +class ReadwiseConfig(BaseConfigSet): + READWISE_DB_PATH: Path = Field(default=SOURCES_DIR / "readwise_reader_api.db") + READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} + + +@abx.hookimpl +def get_CONFIG(): + return { + __id__: ReadwiseConfig() + } + +@abx.hookimpl +def ready(): + READWISE_CONFIG = abx.pm.hook.get_CONFIG()[__id__] + READWISE_CONFIG.validate() diff --git a/archivebox/vendor/abx-plugin-readwise/pyproject.toml b/archivebox/vendor/abx-plugin-readwise/pyproject.toml new file mode 100644 index 00000000..c85d489f --- /dev/null +++ b/archivebox/vendor/abx-plugin-readwise/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-readwise" +version = "2024.10.28" +description = "Readwise API Extractor" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_readwise = "abx_plugin_readwise" + diff --git a/packages/abx-plugin-ripgrep-search/README.md b/archivebox/vendor/abx-plugin-ripgrep-search/README.md similarity index 100% rename from packages/abx-plugin-ripgrep-search/README.md rename to archivebox/vendor/abx-plugin-ripgrep-search/README.md diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py new file mode 100644 index 00000000..91347523 --- /dev/null +++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py @@ -0,0 +1,31 @@ +__package__ = 'abx_plugin_ripgrep_search' +__label__ = 'Ripgrep Search' +__homepage__ = 'https://github.com/BurntSushi/ripgrep' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import RIPGREP_CONFIG + + return { + 'RIPGREP_CONFIG': RIPGREP_CONFIG + } + + +@abx.hookimpl +def get_BINARIES(): + from .binaries import RIPGREP_BINARY + + return { + 'ripgrep': RIPGREP_BINARY + } + + +@abx.hookimpl +def get_SEARCHBACKENDS(): + from .searchbackend import RIPGREP_SEARCH_BACKEND + + return { + 'ripgrep': RIPGREP_SEARCH_BACKEND, + } diff --git a/packages/abx-plugin-ripgrep-search/binaries.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py similarity index 65% rename from packages/abx-plugin-ripgrep-search/binaries.py rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py index 710a1ef0..ef9217ad 100644 --- a/packages/abx-plugin-ripgrep-search/binaries.py +++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import RIPGREP_CONFIG -class RipgrepBinary(BaseBinary): +class RipgrepBinary(Binary): name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-ripgrep-search/config.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py similarity index 89% rename from packages/abx-plugin-ripgrep-search/config.py rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py index 726c21e8..e0fd3b28 100644 --- a/packages/abx-plugin-ripgrep-search/config.py +++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' from pathlib import Path from typing import List from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config import CONSTANTS from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/packages/abx-plugin-ripgrep-search/searchbackend.py b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py similarity index 93% rename from packages/abx-plugin-ripgrep-search/searchbackend.py rename to archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py index 3c30af85..ed3965ba 100644 --- a/packages/abx-plugin-ripgrep-search/searchbackend.py +++ b/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' import re import subprocess from typing import List, Iterable -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .binaries import RIPGREP_BINARY from .config import RIPGREP_CONFIG diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml b/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml new file mode 100644 index 00000000..67245c48 --- /dev/null +++ b/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-ripgrep-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-searchbackend>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_ripgrep_search = "abx_plugin_ripgrep_search" diff --git a/packages/abx-plugin-singlefile-extractor/README.md b/archivebox/vendor/abx-plugin-singlefile/README.md similarity index 100% rename from packages/abx-plugin-singlefile-extractor/README.md rename to archivebox/vendor/abx-plugin-singlefile/README.md diff --git a/packages/abx-plugin-singlefile-extractor/__init__.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py similarity index 53% rename from packages/abx-plugin-singlefile-extractor/__init__.py rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py index cd72adb8..ddfb4236 100644 --- a/packages/abx-plugin-singlefile-extractor/__init__.py +++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py @@ -1,32 +1,16 @@ -__package__ = 'plugins_extractor.singlefile' -__label__ = 'singlefile' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' +__package__ = 'abx_plugin_singlefile' +__label__ = 'Singlefile' __homepage__ = 'https://github.com/gildas-lormeau/singlefile' -__dependencies__ = ['npm'] import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'singlefile': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import SINGLEFILE_CONFIG return { - 'singlefile': SINGLEFILE_CONFIG + 'SINGLEFILE_CONFIG': SINGLEFILE_CONFIG } @abx.hookimpl diff --git a/packages/abx-plugin-singlefile-extractor/binaries.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py similarity index 84% rename from packages/abx-plugin-singlefile-extractor/binaries.py rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py index 0c8a1bab..7af784a3 100644 --- a/packages/abx-plugin-singlefile-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py @@ -1,13 +1,10 @@ -__package__ = 'plugins_extractor.singlefile' - from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath +from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath -from abx.archivebox.base_binary import BaseBinary, env - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_default_binproviders import env +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import SINGLEFILE_CONFIG @@ -16,7 +13,7 @@ SINGLEFILE_MIN_VERSION = '1.1.54' SINGLEFILE_MAX_VERSION = '1.1.60' -class SinglefileBinary(BaseBinary): +class SinglefileBinary(Binary): name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/packages/abx-plugin-singlefile-extractor/config.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py similarity index 88% rename from packages/abx-plugin-singlefile-extractor/config.py rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py index 7d27031e..0d2164ba 100644 --- a/packages/abx-plugin-singlefile-extractor/config.py +++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py @@ -1,11 +1,9 @@ -__package__ = 'plugins_extractor.singlefile' - from pathlib import Path from typing import List, Optional from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py new file mode 100644 index 00000000..07b674ac --- /dev/null +++ b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py @@ -0,0 +1,18 @@ +__package__ = 'abx_plugin_singlefile' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + +# from .binaries import SINGLEFILE_BINARY + + +# class SinglefileExtractor(BaseExtractor): +# name: str = 'singlefile' +# binary: BinName = SINGLEFILE_BINARY.name + +# def get_output_path(self, snapshot) -> Path: +# return Path(snapshot.link_dir) / 'singlefile.html' + + +# SINGLEFILE_EXTRACTOR = SinglefileExtractor() diff --git a/packages/abx-plugin-singlefile-extractor/models.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py similarity index 100% rename from packages/abx-plugin-singlefile-extractor/models.py rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py diff --git a/archivebox/extractors/singlefile.py b/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py similarity index 100% rename from archivebox/extractors/singlefile.py rename to archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py diff --git a/archivebox/vendor/abx-plugin-singlefile/pyproject.toml b/archivebox/vendor/abx-plugin-singlefile/pyproject.toml new file mode 100644 index 00000000..7cecd40a --- /dev/null +++ b/archivebox/vendor/abx-plugin-singlefile/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-singlefile" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_singlefile = "abx_plugin_singlefile" diff --git a/packages/abx-plugin-sonic-search/README.md b/archivebox/vendor/abx-plugin-sonic-search/README.md similarity index 100% rename from packages/abx-plugin-sonic-search/README.md rename to archivebox/vendor/abx-plugin-sonic-search/README.md diff --git a/packages/abx-plugin-sonic-search/__init__.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py similarity index 53% rename from packages/abx-plugin-sonic-search/__init__.py rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py index a899679b..1a92a8d2 100644 --- a/packages/abx-plugin-sonic-search/__init__.py +++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py @@ -1,32 +1,16 @@ -__package__ = 'plugins_search.sonic' -__label__ = 'sonic' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' +__package__ = 'abx_plugin_sonic_search' +__label__ = 'Sonic Search' __homepage__ = 'https://github.com/valeriansaliou/sonic' -__dependencies__ = [] import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'sonic': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import SONIC_CONFIG return { - 'sonic': SONIC_CONFIG + 'SONIC_CONFIG': SONIC_CONFIG } diff --git a/packages/abx-plugin-sonic-search/binaries.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py similarity index 80% rename from packages/abx-plugin-sonic-search/binaries.py rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py index eab987c5..2e8fb536 100644 --- a/packages/abx-plugin-sonic-search/binaries.py +++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py @@ -1,16 +1,16 @@ -__package__ = 'plugins_search.sonic' +__package__ = 'abx_plugin_sonic_search' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, brew +from abx_plugin_default_binproviders import brew, env from .config import SONIC_CONFIG -class SonicBinary(BaseBinary): +class SonicBinary(Binary): name: BinName = SONIC_CONFIG.SONIC_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo diff --git a/packages/abx-plugin-sonic-search/config.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py similarity index 93% rename from packages/abx-plugin-sonic-search/config.py rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py index d54ed568..97cc7b3a 100644 --- a/packages/abx-plugin-sonic-search/config.py +++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_search.sonic' +__package__ = 'abx_plugin_sonic_search' import sys from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/packages/abx-plugin-sonic-search/searchbackend.py b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py similarity index 97% rename from packages/abx-plugin-sonic-search/searchbackend.py rename to archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py index 1662e5b2..a63a0132 100644 --- a/packages/abx-plugin-sonic-search/searchbackend.py +++ b/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py @@ -2,7 +2,7 @@ __package__ = 'plugins_search.sonic' from typing import List, Generator, cast -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .config import SONIC_CONFIG, SONIC_LIB diff --git a/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml b/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml new file mode 100644 index 00000000..b6551b52 --- /dev/null +++ b/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-sonic-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-searchbackend>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_sonic_search = "abx_plugin_sonic_search" diff --git a/packages/abx-plugin-sqlitefts-search/README.md b/archivebox/vendor/abx-plugin-sqlitefts-search/README.md similarity index 100% rename from packages/abx-plugin-sqlitefts-search/README.md rename to archivebox/vendor/abx-plugin-sqlitefts-search/README.md diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py new file mode 100644 index 00000000..5d5ed6de --- /dev/null +++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py @@ -0,0 +1,21 @@ +__package__ = 'abx_plugin_sqlitefts_search' +__label__ = 'SQLiteFTS Search' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import SQLITEFTS_CONFIG + + return { + 'SQLITEFTS_CONFIG': SQLITEFTS_CONFIG + } + + +@abx.hookimpl +def get_SEARCHBACKENDS(): + from .searchbackend import SQLITEFTS_SEARCH_BACKEND + + return { + 'sqlitefts': SQLITEFTS_SEARCH_BACKEND, + } diff --git a/packages/abx-plugin-sqlitefts-search/config.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py similarity index 96% rename from packages/abx-plugin-sqlitefts-search/config.py rename to archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py index 5690dc6c..789ff114 100644 --- a/packages/abx-plugin-sqlitefts-search/config.py +++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py @@ -1,4 +1,4 @@ -__package__ = 'plugins_search.sqlitefts' +__package__ = 'abx_plugin_sqlitefts_search' import sys import sqlite3 @@ -8,7 +8,7 @@ from django.core.exceptions import ImproperlyConfigured from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/packages/abx-plugin-sqlitefts-search/searchbackend.py b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py similarity index 98% rename from packages/abx-plugin-sqlitefts-search/searchbackend.py rename to archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py index 630bdd4c..2ae7c9cf 100644 --- a/packages/abx-plugin-sqlitefts-search/searchbackend.py +++ b/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_search.sqlitefts' +__package__ = 'abx_plugin_sqlitefts_search' import codecs import sqlite3 from typing import List, Iterable -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .config import SQLITEFTS_CONFIG diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml b/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml new file mode 100644 index 00000000..abc6181a --- /dev/null +++ b/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-sqlitefts-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-searchbackend>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_sqlitefts_search = "abx_plugin_sqlitefts_search" diff --git a/packages/abx-plugin-wget-extractor/README.md b/archivebox/vendor/abx-plugin-title/README.md similarity index 100% rename from packages/abx-plugin-wget-extractor/README.md rename to archivebox/vendor/abx-plugin-title/README.md diff --git a/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py b/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py new file mode 100644 index 00000000..d3e5cac5 --- /dev/null +++ b/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py @@ -0,0 +1,9 @@ +import abx + +# @abx.hookimpl +# def get_CONFIG(): +# from .config import TITLE_EXTRACTOR_CONFIG + +# return { +# 'title_extractor': TITLE_EXTRACTOR_CONFIG +# } diff --git a/archivebox/extractors/title.py b/archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py similarity index 97% rename from archivebox/extractors/title.py rename to archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py index ceefb699..a8ef52cf 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py @@ -11,8 +11,8 @@ from archivebox.misc.util import ( htmldecode, dedupe, ) -from archivebox.plugins_extractor.curl.config import CURL_CONFIG -from archivebox.plugins_extractor.curl.binaries import CURL_BINARY +from abx_plugin_curl_extractor.config import CURL_CONFIG +from abx_plugin_curl_extractor.binaries import CURL_BINARY from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..logging_util import TimedProgress diff --git a/archivebox/vendor/abx-plugin-title/pyproject.toml b/archivebox/vendor/abx-plugin-title/pyproject.toml new file mode 100644 index 00000000..a9737b3a --- /dev/null +++ b/archivebox/vendor/abx-plugin-title/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-title" +version = "2024.10.27" +description = "Title Extractor" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.28", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_title = "abx_plugin_title" diff --git a/packages/abx-plugin-ytdlp-extractor/README.md b/archivebox/vendor/abx-plugin-wget/README.md similarity index 100% rename from packages/abx-plugin-ytdlp-extractor/README.md rename to archivebox/vendor/abx-plugin-wget/README.md diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py new file mode 100644 index 00000000..a32987ee --- /dev/null +++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_wget' +__label__ = 'WGET' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import WGET_CONFIG + + return { + 'WGET_CONFIG': WGET_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import WGET_BINARY + + return { + 'wget': WGET_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR + + return { + 'wget': WGET_EXTRACTOR, + 'warc': WARC_EXTRACTOR, + } + +@abx.hookimpl +def ready(): + from .config import WGET_CONFIG + WGET_CONFIG.validate() diff --git a/packages/abx-plugin-wget-extractor/binaries.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py similarity index 57% rename from packages/abx-plugin-wget-extractor/binaries.py rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py index 6198beac..39cbe111 100644 --- a/packages/abx-plugin-wget-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.wget' +__package__ = 'abx_plugin_wget' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import WGET_CONFIG -class WgetBinary(BaseBinary): +class WgetBinary(Binary): name: BinName = WGET_CONFIG.WGET_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-wget-extractor/config.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py similarity index 96% rename from packages/abx-plugin-wget-extractor/config.py rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py index 12edf672..1dfd1b07 100644 --- a/packages/abx-plugin-wget-extractor/config.py +++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py @@ -1,12 +1,10 @@ -__package__ = 'plugins_extractor.wget' - import subprocess from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG from archivebox.misc.logging import STDERR diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py new file mode 100644 index 00000000..4d4d0243 --- /dev/null +++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_wget' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + +# from .binaries import WGET_BINARY +# from .wget_util import wget_output_path + +# class WgetExtractor(BaseExtractor): +# name: ExtractorName = 'wget' +# binary: BinName = WGET_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# wget_index_path = wget_output_path(snapshot.as_link()) +# if wget_index_path: +# return Path(wget_index_path) +# return None + +# WGET_EXTRACTOR = WgetExtractor() + + +# class WarcExtractor(BaseExtractor): +# name: ExtractorName = 'warc' +# binary: BinName = WGET_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) +# if warc_files: +# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] +# return None + + +# WARC_EXTRACTOR = WarcExtractor() + diff --git a/archivebox/extractors/wget.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py similarity index 97% rename from archivebox/extractors/wget.py rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py index 416e797e..caaaeaf6 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_wget_extractor' import re import os @@ -17,10 +17,11 @@ from archivebox.misc.util import ( urldecode, dedupe, ) -from archivebox.plugins_extractor.wget.config import WGET_CONFIG -from archivebox.plugins_extractor.wget.binaries import WGET_BINARY -from ..logging_util import TimedProgress -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from .config import WGET_CONFIG +from .binaries import WGET_BINARY + +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError def get_output_path(): diff --git a/packages/abx-plugin-wget-extractor/wget_util.py b/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py similarity index 100% rename from packages/abx-plugin-wget-extractor/wget_util.py rename to archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py diff --git a/archivebox/vendor/abx-plugin-wget/pyproject.toml b/archivebox/vendor/abx-plugin-wget/pyproject.toml new file mode 100644 index 00000000..d401e52f --- /dev/null +++ b/archivebox/vendor/abx-plugin-wget/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-wget" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_wget = "abx_plugin_wget" diff --git a/packages/abx-spec-archivebox/README.md b/archivebox/vendor/abx-plugin-ytdlp/README.md similarity index 100% rename from packages/abx-spec-archivebox/README.md rename to archivebox/vendor/abx-plugin-ytdlp/README.md diff --git a/packages/abx-plugin-ytdlp-extractor/__init__.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py similarity index 53% rename from packages/abx-plugin-ytdlp-extractor/__init__.py rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py index 1dc9ef99..5b1d9968 100644 --- a/packages/abx-plugin-ytdlp-extractor/__init__.py +++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py @@ -1,30 +1,15 @@ -__package__ = 'plugins_extractor.ytdlp' +__package__ = 'abx_plugin_ytdlp' __label__ = 'YT-DLP' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' __homepage__ = 'https://github.com/yt-dlp/yt-dlp' import abx - -@abx.hookimpl -def get_PLUGIN(): - return { - 'ytdlp': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import YTDLP_CONFIG return { - 'ytdlp': YTDLP_CONFIG + 'YTDLP_CONFIG': YTDLP_CONFIG } @abx.hookimpl diff --git a/packages/abx-plugin-ytdlp-extractor/binaries.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py similarity index 77% rename from packages/abx-plugin-ytdlp-extractor/binaries.py rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py index 730de2dc..69239515 100644 --- a/packages/abx-plugin-ytdlp-extractor/binaries.py +++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py @@ -1,26 +1,25 @@ -__package__ = 'plugins_extractor.ytdlp' +__package__ = 'abx_plugin_ytdlp' import subprocess from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides +from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew - -from plugins_pkg.pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from abx_plugin_default_binproviders import apt, brew, env +from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER from .config import YTDLP_CONFIG -class YtdlpBinary(BaseBinary): +class YtdlpBinary(Binary): name: BinName = YTDLP_CONFIG.YTDLP_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] YTDLP_BINARY = YtdlpBinary() -class FfmpegBinary(BaseBinary): +class FfmpegBinary(Binary): name: BinName = 'ffmpeg' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/packages/abx-plugin-ytdlp-extractor/config.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py similarity index 97% rename from packages/abx-plugin-ytdlp-extractor/config.py rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py index 0082df3d..b36d19d1 100644 --- a/packages/abx-plugin-ytdlp-extractor/config.py +++ b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py @@ -4,7 +4,7 @@ from typing import List from pydantic import Field, AliasChoices -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG from archivebox.misc.logging import STDERR diff --git a/archivebox/extractors/media.py b/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py similarity index 100% rename from archivebox/extractors/media.py rename to archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py diff --git a/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml b/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml new file mode 100644 index 00000000..b45626bd --- /dev/null +++ b/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-ytdlp" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_ytdlp = "abx_plugin_ytdlp" diff --git a/packages/abx-spec-django/README.md b/archivebox/vendor/abx-spec-archivebox/README.md similarity index 100% rename from packages/abx-spec-django/README.md rename to archivebox/vendor/abx-spec-archivebox/README.md diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py new file mode 100644 index 00000000..ab591c96 --- /dev/null +++ b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py @@ -0,0 +1,28 @@ +__package__ = 'abx_spec_archivebox' +__order__ = 400 + +# from .effects import * +# from .events import * +# from .reads import * +# from .writes import * +# from .states import * + +from typing import cast + +import abx +from abx_spec_config import ConfigPluginSpec +from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec +from abx_spec_django import DjangoPluginSpec +from abx_spec_searchbackend import SearchBackendPluginSpec + +class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec): + """ + ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs. + """ + pass + +PLUGIN_SPEC = ArchiveBoxPluginSpec + + +TypedPluginManager = abx.ABXPluginManager[ArchiveBoxPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/effects.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py similarity index 100% rename from packages/abx-spec-archivebox/abx_spec_archivebox/effects.py rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/events.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py similarity index 100% rename from packages/abx-spec-archivebox/abx_spec_archivebox/events.py rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/reads.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py similarity index 100% rename from packages/abx-spec-archivebox/abx_spec_archivebox/reads.py rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/states.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py similarity index 100% rename from packages/abx-spec-archivebox/abx_spec_archivebox/states.py rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/writes.py b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py similarity index 99% rename from packages/abx-spec-archivebox/abx_spec_archivebox/writes.py rename to archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py index 1ca1ac7e..977543d2 100644 --- a/packages/abx-spec-archivebox/abx_spec_archivebox/writes.py +++ b/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py @@ -8,7 +8,6 @@ from benedict import benedict from django.conf import settings import abx -from .. import pm @abx.hookimpl diff --git a/packages/abx-spec-archivebox/pyproject.toml b/archivebox/vendor/abx-spec-archivebox/pyproject.toml similarity index 100% rename from packages/abx-spec-archivebox/pyproject.toml rename to archivebox/vendor/abx-spec-archivebox/pyproject.toml diff --git a/packages/abx-spec-extractor/README.md b/archivebox/vendor/abx-spec-config/README.md similarity index 100% rename from packages/abx-spec-extractor/README.md rename to archivebox/vendor/abx-spec-config/README.md diff --git a/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py b/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py new file mode 100644 index 00000000..3feaab82 --- /dev/null +++ b/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py @@ -0,0 +1,66 @@ +__order__ = 100 + +import os +from pathlib import Path +from typing import Dict, Any, cast + +from benedict import benedict + + +import abx + +from .base_configset import BaseConfigSet, ConfigKeyStr + + +class ConfigPluginSpec: + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_collection_config_path(self) -> Path: + return Path(os.getcwd()) / "ArchiveBox.conf" + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_system_config_path(self) -> Path: + return Path('~/.config/abx/abx.conf').expanduser() + + + @abx.hookspec + @abx.hookimpl + def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}""" + return { + # override this in your plugin to return your plugin's config, e.g. + # 'ytdlp': YtdlpConfig(...), + } + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}""" + return abx.as_dict(pm.hook.get_CONFIG()) + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]: + """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}""" + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in benedict(configset).items() + }) + + + # TODO: add read_config_file(), write_config_file() hooks + + +PLUGIN_SPEC = ConfigPluginSpec + + +class ExpectedPluginSpec(ConfigPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/packages/abx-spec-config/abx_spec_config/base_configset.py b/archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py similarity index 100% rename from packages/abx-spec-config/abx_spec_config/base_configset.py rename to archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py diff --git a/packages/abx-spec-config/abx_spec_config/toml_util.py b/archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py similarity index 100% rename from packages/abx-spec-config/abx_spec_config/toml_util.py rename to archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py diff --git a/packages/abx-spec-config/pyproject.toml b/archivebox/vendor/abx-spec-config/pyproject.toml similarity index 67% rename from packages/abx-spec-config/pyproject.toml rename to archivebox/vendor/abx-spec-config/pyproject.toml index b85f675e..aa2f6eb4 100644 --- a/packages/abx-spec-config/pyproject.toml +++ b/archivebox/vendor/abx-spec-config/pyproject.toml @@ -1,6 +1,9 @@ [project] name = "abx-spec-config" -version = "0.0.1" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" dependencies = [ "abx>=0.1.0", "python-benedict>=0.34.0", diff --git a/packages/abx-spec-pydantic-pkgr/README.md b/archivebox/vendor/abx-spec-django/README.md similarity index 100% rename from packages/abx-spec-pydantic-pkgr/README.md rename to archivebox/vendor/abx-spec-django/README.md diff --git a/archivebox/vendor/abx-spec-django/abx_spec_django.py b/archivebox/vendor/abx-spec-django/abx_spec_django.py new file mode 100644 index 00000000..562dad72 --- /dev/null +++ b/archivebox/vendor/abx-spec-django/abx_spec_django.py @@ -0,0 +1,118 @@ +__order__ = 300 + +import abx +from typing import List, Dict, Any, cast + +########################################################################################### + +class DjangoPluginSpec: + @abx.hookspec + def get_INSTALLED_APPS() -> List[str]: + return ['abx_spec_django'] + + @abx.hookspec + def get_TEMPLATE_DIRS() -> List[str]: + return [] # e.g. ['your_plugin_type/plugin_name/templates'] + + + @abx.hookspec + def get_STATICFILES_DIRS() -> List[str]: + return [] # e.g. ['your_plugin_type/plugin_name/static'] + + # @abx.hookspec + # def register_STATICFILES_DIRS(STATICFILES_DIRS): + # """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" + # # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') + # pass + + + @abx.hookspec + def get_MIDDLEWARES() -> List[str]: + return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] + + # @abx.hookspec + # def register_MIDDLEWARE(MIDDLEWARE): + # """Mutate MIDDLEWARE in place to add your middleware in a specific position""" + # # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') + # pass + + + @abx.hookspec + def get_AUTHENTICATION_BACKENDS() -> List[str]: + return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] + + # @abx.hookspec + # def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): + # """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" + # # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') + # pass + + @abx.hookspec + def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME) -> Dict[str, Dict[str, Any]]: + return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}} + + # @abx.hookspec + # def register_DJANGO_HUEY(DJANGO_HUEY): + # """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" + # # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' + # pass + + + @abx.hookspec + def get_ADMIN_DATA_VIEWS_URLS() -> List[str]: + return [] + + # @abx.hookspec + # def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): + # """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" + # # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') + # pass + + + # @abx.hookspec + # def register_settings(settings): + # """Mutate settings in place to add your settings / modify existing settings""" + # # settings.SOME_KEY = 'some_value' + # pass + + + ########################################################################################### + + @abx.hookspec + def get_urlpatterns() -> List[str]: + return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] + + # @abx.hookspec + # def register_urlpatterns(urlpatterns): + # """Mutate urlpatterns in place to add your urlpatterns in a specific position""" + # # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) + # pass + + ########################################################################################### + + + + @abx.hookspec + def register_admin(admin_site) -> None: + """Register django admin views/models with the main django admin site instance""" + # e.g. admin_site.register(your_model, your_admin_class) + pass + + + ########################################################################################### + + + @abx.hookspec + def ready() -> None: + """Called when Django apps app.ready() are triggered""" + # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate() + pass + + +PLUGIN_SPEC = DjangoPluginSpec + +class ExpectedPluginSpec(DjangoPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/packages/abx-spec-django/pyproject.toml b/archivebox/vendor/abx-spec-django/pyproject.toml similarity index 100% rename from packages/abx-spec-django/pyproject.toml rename to archivebox/vendor/abx-spec-django/pyproject.toml diff --git a/packages/abx-spec-searchbackend/README.md b/archivebox/vendor/abx-spec-extractor/README.md similarity index 100% rename from packages/abx-spec-searchbackend/README.md rename to archivebox/vendor/abx-spec-extractor/README.md diff --git a/packages/abx-spec-extractor/abx_spec_extractor.py b/archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py similarity index 100% rename from packages/abx-spec-extractor/abx_spec_extractor.py rename to archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py diff --git a/packages/abx-spec-extractor/pyproject.toml b/archivebox/vendor/abx-spec-extractor/pyproject.toml similarity index 100% rename from packages/abx-spec-extractor/pyproject.toml rename to archivebox/vendor/abx-spec-extractor/pyproject.toml diff --git a/packages/abx/README.md b/archivebox/vendor/abx-spec-pydantic-pkgr/README.md similarity index 100% rename from packages/abx/README.md rename to archivebox/vendor/abx-spec-pydantic-pkgr/README.md diff --git a/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py new file mode 100644 index 00000000..b95b3f33 --- /dev/null +++ b/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py @@ -0,0 +1,114 @@ +__order__ = 200 + +import os + +from typing import Dict, cast +from pathlib import Path + +from pydantic_pkgr import Binary, BinProvider + +import abx + +from abx_spec_config import ConfigPluginSpec + +########################################################################################### + +class PydanticPkgrPluginSpec: + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_LIB_DIR(self) -> Path: + """Get the directory where shared runtime libraries/dependencies should be installed""" + FLAT_CONFIG = pm.hook.get_FLAT_CONFIG() + LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx')) + return LIB_DIR + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BIN_DIR(self) -> Path: + """Get the directory where binaries should be symlinked to""" + FLAT_CONFIG = pm.hook.get_FLAT_CONFIG() + LIB_DIR = pm.hook.get_LIB_DIR() + BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin') + return BIN_DIR + + @abx.hookspec + @abx.hookimpl + def get_BINPROVIDERS(self) -> Dict[str, BinProvider]: + return { + # to be implemented by plugins, e.g.: + # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')), + } + + @abx.hookspec + @abx.hookimpl + def get_BINARIES(self) -> Dict[str, Binary]: + return { + # to be implemented by plugins, e.g.: + # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]), + } + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider: + """Get a specific BinProvider by name""" + return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name] + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BINARY(self, bin_name: str) -> Binary: + """Get a specific Binary by name""" + return abx.as_dict(pm.hook.get_BINARIES())[bin_name] + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_load(self, binary: Binary, **kwargs) -> Binary: + """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)""" + loaded_binary = binary.load(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_install(self, binary: Binary, **kwargs) -> Binary: + """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)""" + loaded_binary = binary.install(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary: + """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)""" + loaded_binary = binary.load_or_install(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None): + if not (binary.abspath and os.path.isfile(binary.abspath)): + return + + BIN_DIR = pm.hook.get_BIN_DIR() + try: + BIN_DIR.mkdir(parents=True, exist_ok=True) + symlink = BIN_DIR / binary.name + symlink.unlink(missing_ok=True) + symlink.symlink_to(binary.abspath) + symlink.chmod(0o777) # make sure its executable by everyone + except Exception: + # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') + # not actually needed, we can just run without it + pass + + +PLUGIN_SPEC = PydanticPkgrPluginSpec + + +class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable] +pm = cast(TypedPluginManager, abx.pm) diff --git a/packages/abx-spec-pydantic-pkgr/pyproject.toml b/archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml similarity index 100% rename from packages/abx-spec-pydantic-pkgr/pyproject.toml rename to archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml diff --git a/archivebox/vendor/abx-spec-searchbackend/README.md b/archivebox/vendor/abx-spec-searchbackend/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py b/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py new file mode 100644 index 00000000..8bc53eb8 --- /dev/null +++ b/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py @@ -0,0 +1,40 @@ +import abc +from typing import Iterable, List, Dict, cast + +import abx +from abx_spec_config import ConfigPluginSpec + + +class BaseSearchBackend(abc.ABC): + name: str + + @staticmethod + @abc.abstractmethod + def index(snapshot_id: str, texts: List[str]): + return + + @staticmethod + @abc.abstractmethod + def flush(snapshot_ids: Iterable[str]): + return + + @staticmethod + @abc.abstractmethod + def search(text: str) -> List[str]: + raise NotImplementedError("search method must be implemented by subclass") + + +class SearchBackendPluginSpec: + @abx.hookspec + @abx.hookimpl + def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]: + return {} + + +class ExpectedPluginSpec(SearchBackendPluginSpec, ConfigPluginSpec): + pass + +PLUGIN_SPEC = SearchBackendPluginSpec + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/packages/abx-spec-searchbackend/pyproject.toml b/archivebox/vendor/abx-spec-searchbackend/pyproject.toml similarity index 100% rename from packages/abx-spec-searchbackend/pyproject.toml rename to archivebox/vendor/abx-spec-searchbackend/pyproject.toml diff --git a/archivebox/vendor/abx/README.md b/archivebox/vendor/abx/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/vendor/abx/abx.py b/archivebox/vendor/abx/abx.py new file mode 100644 index 00000000..990fe8e1 --- /dev/null +++ b/archivebox/vendor/abx/abx.py @@ -0,0 +1,483 @@ +__package__ = 'abx' +__id__ = 'abx' +__label__ = 'ABX' +__author__ = 'Nick Sweeting' +__homepage__ = 'https://github.com/ArchiveBox' +__order__ = 0 + + +import inspect +import importlib +import itertools +from pathlib import Path +from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypeVar, TypedDict, Type, cast, Generic, Mapping, overload, Final, ParamSpec, Literal, Protocol +from types import ModuleType +from typing_extensions import Annotated +from functools import cache + +from benedict import benedict +from pydantic import AfterValidator + +from pluggy import HookimplMarker, PluginManager, HookimplOpts, HookspecOpts, HookCaller + + + +ParamsT = ParamSpec("ParamsT") +ReturnT = TypeVar('ReturnT') + +class HookSpecDecoratorThatReturnsFirstResult(Protocol): + def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, ReturnT]: ... + +class HookSpecDecoratorThatReturnsListResults(Protocol): + def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, List[ReturnT]]: ... + + +class TypedHookspecMarker: + """ + Improved version of pluggy.HookspecMarker that supports type inference of hookspecs with firstresult=True|False correctly + https://github.com/pytest-dev/pluggy/issues/191 + """ + + __slots__ = ('project_name',) + + def __init__(self, project_name: str) -> None: + self.project_name: Final[str] = project_name + + # handle @hookspec(firstresult=False) -> List[ReturnT] (test_firstresult_False_hookspec) + @overload + def __call__( + self, + function: None = ..., + firstresult: Literal[False] = ..., + historic: bool = ..., + warn_on_impl: Warning | None = ..., + warn_on_impl_args: Mapping[str, Warning] | None = ..., + ) -> HookSpecDecoratorThatReturnsListResults: ... + + # handle @hookspec(firstresult=True) -> ReturnT (test_firstresult_True_hookspec) + @overload + def __call__( + self, + function: None = ..., + firstresult: Literal[True] = ..., + historic: bool = ..., + warn_on_impl: Warning | None = ..., + warn_on_impl_args: Mapping[str, Warning] | None = ..., + ) -> HookSpecDecoratorThatReturnsFirstResult: ... + + # handle @hookspec -> List[ReturnT] (test_normal_hookspec) + # order matters!!! this one has to come last + @overload + def __call__( + self, + function: Callable[ParamsT, ReturnT] = ..., + firstresult: Literal[False] = ..., + historic: bool = ..., + warn_on_impl: None = ..., + warn_on_impl_args: None = ..., + ) -> Callable[ParamsT, List[ReturnT]]: ... + + def __call__( + self, + function: Callable[ParamsT, ReturnT] | None = None, + firstresult: bool = False, + historic: bool = False, + warn_on_impl: Warning | None = None, + warn_on_impl_args: Mapping[str, Warning] | None = None, + ) -> Callable[ParamsT, List[ReturnT]] | HookSpecDecoratorThatReturnsListResults | HookSpecDecoratorThatReturnsFirstResult: + + def setattr_hookspec_opts(func) -> Callable: + if historic and firstresult: + raise ValueError("cannot have a historic firstresult hook") + opts: HookspecOpts = { + "firstresult": firstresult, + "historic": historic, + "warn_on_impl": warn_on_impl, + "warn_on_impl_args": warn_on_impl_args, + } + setattr(func, self.project_name + "_spec", opts) + return func + + if function is not None: + return setattr_hookspec_opts(function) + else: + return setattr_hookspec_opts + + + + +spec = hookspec = TypedHookspecMarker("abx") +impl = hookimpl = HookimplMarker("abx") + + +def is_valid_attr_name(x: str) -> str: + assert x.isidentifier() and not x.startswith('_') + return x + +def is_valid_module_name(x: str) -> str: + assert x.isidentifier() and not x.startswith('_') and x.islower() + return x + +AttrName = Annotated[str, AfterValidator(is_valid_attr_name)] +PluginId = Annotated[str, AfterValidator(is_valid_module_name)] + + +class PluginInfo(TypedDict, total=True): + id: PluginId + package: AttrName + label: str + version: str + author: str + homepage: str + dependencies: List[str] + + source_code: str + hooks: Dict[AttrName, Callable] + module: ModuleType + + + +PluginSpec = TypeVar("PluginSpec") + +class ABXPluginManager(PluginManager, Generic[PluginSpec]): + """ + Patch to fix pluggy's PluginManager to work with pydantic models. + See: https://github.com/pytest-dev/pluggy/pull/536 + """ + + # enable static type checking of pm.hook.call() calls + # https://stackoverflow.com/a/62871889/2156113 + # https://github.com/pytest-dev/pluggy/issues/191 + hook: PluginSpec + + def create_typed_hookcaller(self, name: str, module_or_class: Type[PluginSpec], spec_opts: HookspecOpts) -> HookCaller: + """ + create a new HookCaller subclass with a modified __signature__ + so that the return type is correct and args are converted to kwargs + """ + TypedHookCaller = type('TypedHookCaller', (HookCaller,), {}) + + hookspec_signature = inspect.signature(getattr(module_or_class, name)) + hookspec_return_type = hookspec_signature.return_annotation + + # replace return type with list if firstresult=False + hookcall_return_type = hookspec_return_type if spec_opts['firstresult'] else List[hookspec_return_type] + + # replace each arg with kwarg equivalent (pm.hook.call() only accepts kwargs) + args_as_kwargs = [ + param.replace(kind=inspect.Parameter.KEYWORD_ONLY) if param.name != 'self' else param + for param in hookspec_signature.parameters.values() + ] + TypedHookCaller.__signature__ = hookspec_signature.replace(parameters=args_as_kwargs, return_annotation=hookcall_return_type) + TypedHookCaller.__name__ = f'{name}_HookCaller' + + return TypedHookCaller(name, self._hookexec, module_or_class, spec_opts) + + def add_hookspecs(self, module_or_class: Type[PluginSpec]) -> None: + """Add HookSpecs from the given class, (generic type allows us to enforce types of pm.hook.call() statically)""" + names = [] + for name in dir(module_or_class): + spec_opts = self.parse_hookspec_opts(module_or_class, name) + if spec_opts is not None: + hc: HookCaller | None = getattr(self.hook, name, None) + if hc is None: + hc = self.create_typed_hookcaller(name, module_or_class, spec_opts) + setattr(self.hook, name, hc) + else: + # Plugins registered this hook without knowing the spec. + hc.set_specification(module_or_class, spec_opts) + for hookfunction in hc.get_hookimpls(): + self._verify_hook(hc, hookfunction) + names.append(name) + + if not names: + raise ValueError( + f"did not find any {self.project_name!r} hooks in {module_or_class!r}" + ) + + def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None: + # IMPORTANT: @property methods can have side effects, and are never hookimpl + # if attr is a property, skip it in advance + # plugin_class = plugin if inspect.isclass(plugin) else type(plugin) + if isinstance(getattr(plugin, name, None), property): + return None + + try: + return super().parse_hookimpl_opts(plugin, name) + except AttributeError: + return None + + +pm = ABXPluginManager("abx") + + + +def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]: + assert plugin + plugin_module = None + plugin_dir = None + + if isinstance(plugin, str) or isinstance(plugin, Path): + if str(plugin).endswith('.py'): + plugin_dir = Path(plugin).parent + elif '/' in str(plugin): + # assume it's a path to a plugin directory + plugin_dir = Path(plugin) + elif str(plugin).isidentifier(): + pass + + elif inspect.ismodule(plugin): + plugin_module = plugin + plugin_dir = Path(str(plugin_module.__file__)).parent + elif inspect.isclass(plugin): + plugin_module = plugin + plugin_dir = Path(inspect.getfile(plugin)).parent + else: + raise ValueError(f'Invalid plugin, cannot get order: {plugin}') + + if plugin_dir: + try: + # if .plugin_order file exists, use it to set the load priority + order = int((plugin_dir / '.plugin_order').read_text()) + assert -1000000 < order < 100000000 + return (order, plugin_dir) + except FileNotFoundError: + pass + + if plugin_module: + order = getattr(plugin_module, '__order__', 999) + else: + order = 999 + + assert order is not None + assert plugin_dir + + return (order, plugin_dir) + + +# @cache +def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo: + assert plugin + + # import the plugin module by its name + if isinstance(plugin, str): + module = importlib.import_module(plugin) + plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) + elif inspect.ismodule(plugin): + module = plugin + plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) + elif inspect.isclass(plugin): + module = inspect.getmodule(plugin) + else: + raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}') + + assert module + + plugin_file = Path(inspect.getfile(module)) + plugin_package = module.__package__ or module.__name__ + plugin_id = plugin_package.replace('.', '_') + + # load the plugin info from the plugin/__init__.py __attr__s if they exist + plugin_module_attrs = { + 'label': getattr(module, '__label__', plugin_id), + 'version': getattr(module, '__version__', '0.0.1'), + 'author': getattr(module, '__author__', 'ArchiveBox'), + 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'), + 'dependencies': getattr(module, '__dependencies__', []), + } + + # load the plugin info from the plugin/pyproject.toml file if it has one + plugin_toml_info = {} + try: + # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir + plugin_toml_info = benedict.from_toml((plugin_file.parent / 'pyproject.toml').read_text()).project + except Exception: + try: + # try loading ../pyproject.toml next in case the plugin is in a packge dir + plugin_toml_info = benedict.from_toml((plugin_file.parent.parent / 'pyproject.toml').read_text()).project + except Exception: + # print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, plugin_file.parent, 'ERROR:', e) + pass + + + assert plugin_id + assert plugin_package + assert module.__file__ + + # merge the plugin info from all sources + add dyanmically calculated info + return cast(PluginInfo, benedict(PluginInfo(**{ + 'id': plugin_id, + **plugin_module_attrs, + **plugin_toml_info, + 'package': plugin_package, + 'source_code': module.__file__, + 'order': get_plugin_order(plugin), + 'hooks': get_plugin_hooks(plugin), + 'module': module, + 'plugin': plugin, + }))) + + +def get_all_plugins() -> Dict[PluginId, PluginInfo]: + """Get the metadata for all the plugins registered with Pluggy.""" + plugins = {} + for plugin_module in pm.get_plugins(): + plugin_info = get_plugin(plugin=plugin_module) + assert 'id' in plugin_info + plugins[plugin_info['id']] = plugin_info + return benedict(plugins) + + +def get_all_hook_names() -> Set[str]: + """Get a set of all hook names across all plugins""" + return { + hook_name + for plugin_module in pm.get_plugins() + for hook_name in get_plugin_hooks(plugin_module) + } + + +def get_all_hook_specs() -> Dict[str, Dict[str, Any]]: + """Get a set of all hookspec methods defined in all plugins (useful for type checking if a pm.hook.call() is valid)""" + hook_specs = {} + + for hook_name in get_all_hook_names(): + for plugin_module in pm.get_plugins(): + if hasattr(plugin_module, hook_name): + hookspecopts = pm.parse_hookspec_opts(plugin_module, hook_name) + if hookspecopts: + method = getattr(plugin_module, hook_name) + signature = inspect.signature(method) + return_type = signature.return_annotation if signature.return_annotation != inspect._empty else None + + if hookspecopts.get('firstresult'): + return_type = return_type + else: + # if not firstresult, return_type is a sequence + return_type = List[return_type] + + call_signature = signature.replace(return_annotation=return_type) + method = lambda *args, **kwargs: getattr(pm.hook, hook_name)(*args, **kwargs) + method.__signature__ = call_signature + method.__name__ = hook_name + method.__package__ = plugin_module.__package__ + + hook_specs[hook_name] = { + 'name': hook_name, + 'method': method, + 'signature': call_signature, + 'hookspec_opts': hookspecopts, + 'hookspec_signature': signature, + 'hookspec_plugin': plugin_module.__package__, + } + return hook_specs + + + +###### PLUGIN DISCOVERY AND LOADING ######################################################## + + +def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]: + """ + Find all the plugins in a given directory. Just looks for an __init__.py file. + """ + python_dirs = plugins_dir.glob("*/__init__.py") + sorted_python_dirs = sorted(python_dirs, key=lambda p: get_plugin_order(plugin=p) or 500) + + return { + plugin_entrypoint.parent.name: plugin_entrypoint.parent + for plugin_entrypoint in sorted_python_dirs + if plugin_entrypoint.parent.name not in ('abx', 'core') + } + + +def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]: + """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" + import importlib.metadata + + DETECTED_PLUGINS = {} # module_name: module_dir_path + for dist in list(importlib.metadata.distributions()): + for entrypoint in dist.entry_points: + if entrypoint.group != group or pm.is_blocked(entrypoint.name): + continue + DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent + # pm.register(plugin, name=ep.name) + # pm._plugin_distinfo.append((plugin, DistFacade(dist))) + return DETECTED_PLUGINS + + + +# Load all plugins from pip packages, archivebox built-ins, and user plugins +def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]): + """ + Load all the plugins from a dictionary of module names and directory paths. + """ + PLUGINS_TO_LOAD = [] + LOADED_PLUGINS = {} + + for plugin in plugins: + plugin_info = get_plugin(plugin) + assert plugin_info, f'No plugin metadata found for {plugin}' + assert 'id' in plugin_info and 'module' in plugin_info + if plugin_info['module'] in pm.get_plugins(): + LOADED_PLUGINS[plugin_info['id']] = plugin_info + continue + else: + PLUGINS_TO_LOAD.append(plugin_info) + + PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order']) + + for plugin_info in PLUGINS_TO_LOAD: + pm.register(plugin_info['module']) + LOADED_PLUGINS[plugin_info['id']] = plugin_info + # print(f' √ Loaded plugin: {plugin_id}') + return benedict(LOADED_PLUGINS) + +@cache +def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]: + """Get all the functions marked with @hookimpl on a module.""" + if not plugin: + return {} + + hooks = {} + + if isinstance(plugin, str): + plugin_module = importlib.import_module(plugin) + elif inspect.ismodule(plugin) or inspect.isclass(plugin): + plugin_module = plugin + else: + raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}') + + for attr_name in dir(plugin_module): + if attr_name.startswith('_'): + continue + try: + attr = getattr(plugin_module, attr_name) + if isinstance(attr, Callable): + if pm.parse_hookimpl_opts(plugin_module, attr_name): + hooks[attr_name] = attr + except Exception as e: + print(f'Error getting hookimpls for {plugin}: {e}') + + return hooks + +ReturnT = TypeVar('ReturnT') + +def as_list(results: List[List[ReturnT]]) -> List[ReturnT]: + """Flatten a list of lists returned by a pm.hook.call() into a single list""" + return list(itertools.chain(*results)) + + +def as_dict(results: List[Dict[PluginId, ReturnT]]) -> Dict[PluginId, ReturnT]: + """Flatten a list of dicts returned by a pm.hook.call() into a single dict""" + + if isinstance(results, (dict, benedict)): + results_list = results.values() + else: + results_list = results + + return benedict({ + result_id: result + for plugin_results in results_list + for result_id, result in plugin_results.items() + }) diff --git a/packages/abx/pyproject.toml b/archivebox/vendor/abx/pyproject.toml similarity index 100% rename from packages/abx/pyproject.toml rename to archivebox/vendor/abx/pyproject.toml diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket new file mode 160000 index 00000000..e7970b63 --- /dev/null +++ b/archivebox/vendor/pocket @@ -0,0 +1 @@ +Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5 diff --git a/packages/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr similarity index 100% rename from packages/pydantic-pkgr rename to archivebox/vendor/pydantic-pkgr diff --git a/packages/abx-plugin-archivedotorg-extractor/__init__.py b/packages/abx-plugin-archivedotorg-extractor/__init__.py deleted file mode 100644 index a5c24932..00000000 --- a/packages/abx-plugin-archivedotorg-extractor/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_extractor.archivedotorg' -__label__ = 'archivedotorg' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://archive.org' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'archivedotorg': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import ARCHIVEDOTORG_CONFIG - - return { - 'archivedotorg': ARCHIVEDOTORG_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import ARCHIVEDOTORG_EXTRACTOR -# -# return { -# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR, -# } diff --git a/packages/abx-plugin-archivedotorg-extractor/pyproject.toml b/packages/abx-plugin-archivedotorg-extractor/pyproject.toml deleted file mode 100644 index 8754b4bd..00000000 --- a/packages/abx-plugin-archivedotorg-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-archivedotorg-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-chrome-extractor/__init__.py b/packages/abx-plugin-chrome-extractor/__init__.py deleted file mode 100644 index 016cd292..00000000 --- a/packages/abx-plugin-chrome-extractor/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -__package__ = 'plugins_extractor.chrome' -__id__ = 'chrome' -__label__ = 'Chrome' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import CHROME_CONFIG - - return { - __id__: CHROME_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import CHROME_BINARY - - return { - 'chrome': CHROME_BINARY, - } - -@abx.hookimpl -def ready(): - from .config import CHROME_CONFIG - CHROME_CONFIG.validate() - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# return { -# 'pdf': PDF_EXTRACTOR, -# 'screenshot': SCREENSHOT_EXTRACTOR, -# 'dom': DOM_EXTRACTOR, -# } diff --git a/packages/abx-plugin-chrome-extractor/pyproject.toml b/packages/abx-plugin-chrome-extractor/pyproject.toml deleted file mode 100644 index 6676882c..00000000 --- a/packages/abx-plugin-chrome-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-chrome-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-curl-extractor/__init__.py b/packages/abx-plugin-curl-extractor/__init__.py deleted file mode 100644 index 99af0107..00000000 --- a/packages/abx-plugin-curl-extractor/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -__package__ = 'plugins_extractor.curl' -__label__ = 'curl' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/curl/curl' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'curl': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import CURL_CONFIG - - return { - 'curl': CURL_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import CURL_BINARY - - return { - 'curl': CURL_BINARY, - } diff --git a/packages/abx-plugin-curl-extractor/pyproject.toml b/packages/abx-plugin-curl-extractor/pyproject.toml deleted file mode 100644 index 9bd6f396..00000000 --- a/packages/abx-plugin-curl-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-curl-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-favicon-extractor/__init__.py b/packages/abx-plugin-favicon-extractor/__init__.py deleted file mode 100644 index 3fa84560..00000000 --- a/packages/abx-plugin-favicon-extractor/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_extractor.favicon' -__label__ = 'favicon' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'favicon': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import FAVICON_CONFIG - - return { - 'favicon': FAVICON_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import FAVICON_EXTRACTOR - -# return { -# 'favicon': FAVICON_EXTRACTOR, -# } diff --git a/packages/abx-plugin-favicon-extractor/pyproject.toml b/packages/abx-plugin-favicon-extractor/pyproject.toml deleted file mode 100644 index 96e62f6d..00000000 --- a/packages/abx-plugin-favicon-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-favicon-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-git-extractor/__init__.py b/packages/abx-plugin-git-extractor/__init__.py deleted file mode 100644 index db18919f..00000000 --- a/packages/abx-plugin-git-extractor/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.git' -__label__ = 'git' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/git/git' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'git': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import GIT_CONFIG - - return { - 'git': GIT_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import GIT_BINARY - - return { - 'git': GIT_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import GIT_EXTRACTOR - - return { - 'git': GIT_EXTRACTOR, - } diff --git a/packages/abx-plugin-git-extractor/extractors.py b/packages/abx-plugin-git-extractor/extractors.py deleted file mode 100644 index 350f1b82..00000000 --- a/packages/abx-plugin-git-extractor/extractors.py +++ /dev/null @@ -1,17 +0,0 @@ -__package__ = 'plugins_extractor.git' - -from pathlib import Path - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import GIT_BINARY - - -class GitExtractor(BaseExtractor): - name: ExtractorName = 'git' - binary: str = GIT_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - return snapshot.as_link() / 'git' - -GIT_EXTRACTOR = GitExtractor() diff --git a/packages/abx-plugin-git-extractor/pyproject.toml b/packages/abx-plugin-git-extractor/pyproject.toml deleted file mode 100644 index 4a7b375e..00000000 --- a/packages/abx-plugin-git-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-git-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-htmltotext-extractor/__init__.py b/packages/abx-plugin-htmltotext-extractor/__init__.py deleted file mode 100644 index 0f2b756c..00000000 --- a/packages/abx-plugin-htmltotext-extractor/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -__package__ = 'plugins_extractor.htmltotext' -__id__ = 'htmltotext' -__label__ = 'HTML-to-Text' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import HTMLTOTEXT_CONFIG - - return { - __id__: HTMLTOTEXT_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import FAVICON_EXTRACTOR - -# return { -# 'htmltotext': FAVICON_EXTRACTOR, -# } diff --git a/packages/abx-plugin-htmltotext-extractor/pyproject.toml b/packages/abx-plugin-htmltotext-extractor/pyproject.toml deleted file mode 100644 index 2e26cb25..00000000 --- a/packages/abx-plugin-htmltotext-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-htmltotext-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-ldap-auth/pyproject.toml b/packages/abx-plugin-ldap-auth/pyproject.toml deleted file mode 100644 index 1db98ebd..00000000 --- a/packages/abx-plugin-ldap-auth/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[project] -name = "abx-ldap-auth" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] - - -[project.entry-points.abx] -ldap = "abx_ldap_auth" - - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.sdist] -packages = ["."] - -[tool.hatch.build.targets.wheel] -packages = ["."] diff --git a/packages/abx-plugin-mercury-extractor/__init__.py b/packages/abx-plugin-mercury-extractor/__init__.py deleted file mode 100644 index 10aca671..00000000 --- a/packages/abx-plugin-mercury-extractor/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.mercury' -__label__ = 'mercury' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/postlight/mercury-parser' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'mercury': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import MERCURY_CONFIG - - return { - 'mercury': MERCURY_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import MERCURY_BINARY - - return { - 'mercury': MERCURY_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import MERCURY_EXTRACTOR - - return { - 'mercury': MERCURY_EXTRACTOR, - } diff --git a/packages/abx-plugin-mercury-extractor/extractors.py b/packages/abx-plugin-mercury-extractor/extractors.py deleted file mode 100644 index 5d91b0e0..00000000 --- a/packages/abx-plugin-mercury-extractor/extractors.py +++ /dev/null @@ -1,19 +0,0 @@ -__package__ = 'plugins_extractor.mercury' - -from pathlib import Path - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import MERCURY_BINARY - - - -class MercuryExtractor(BaseExtractor): - name: ExtractorName = 'mercury' - binary: str = MERCURY_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - return snapshot.link_dir / 'mercury' / 'content.html' - - -MERCURY_EXTRACTOR = MercuryExtractor() diff --git a/packages/abx-plugin-mercury-extractor/pyproject.toml b/packages/abx-plugin-mercury-extractor/pyproject.toml deleted file mode 100644 index 35415a1d..00000000 --- a/packages/abx-plugin-mercury-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-mercury-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-pocket-extractor/__init__.py b/packages/abx-plugin-pocket-extractor/__init__.py deleted file mode 100644 index bf09435f..00000000 --- a/packages/abx-plugin-pocket-extractor/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.pocket' -__id__ = 'pocket' -__label__ = 'pocket' -__version__ = '2024.10.21' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import POCKET_CONFIG - - return { - __id__: POCKET_CONFIG - } - -@abx.hookimpl -def ready(): - from .config import POCKET_CONFIG - POCKET_CONFIG.validate() diff --git a/packages/abx-plugin-pocket-extractor/pyproject.toml b/packages/abx-plugin-pocket-extractor/pyproject.toml deleted file mode 100644 index c9af2450..00000000 --- a/packages/abx-plugin-pocket-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-pocket-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-puppeteer-binprovider/__init__.py b/packages/abx-plugin-puppeteer-binprovider/__init__.py deleted file mode 100644 index 7acc5b1b..00000000 --- a/packages/abx-plugin-puppeteer-binprovider/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_pkg.puppeteer' -__label__ = 'puppeteer' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/puppeteer/puppeteer' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'puppeteer': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import PUPPETEER_CONFIG - - return { - 'puppeteer': PUPPETEER_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import PUPPETEER_BINARY - - return { - 'puppeteer': PUPPETEER_BINARY, - } - -@abx.hookimpl -def get_BINPROVIDERS(): - from .binproviders import PUPPETEER_BINPROVIDER - - return { - 'puppeteer': PUPPETEER_BINPROVIDER, - } diff --git a/packages/abx-plugin-puppeteer-binprovider/pyproject.toml b/packages/abx-plugin-puppeteer-binprovider/pyproject.toml deleted file mode 100644 index e901ca88..00000000 --- a/packages/abx-plugin-puppeteer-binprovider/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-puppeteer-binprovider" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-readability-extractor/__init__.py b/packages/abx-plugin-readability-extractor/__init__.py deleted file mode 100644 index 2ef1a1a8..00000000 --- a/packages/abx-plugin-readability-extractor/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.readability' -__label__ = 'readability' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/readability-extractor' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'readability': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import READABILITY_CONFIG - - return { - 'readability': READABILITY_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import READABILITY_BINARY - - return { - 'readability': READABILITY_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import READABILITY_EXTRACTOR - - return { - 'readability': READABILITY_EXTRACTOR, - } diff --git a/packages/abx-plugin-readability-extractor/extractors.py b/packages/abx-plugin-readability-extractor/extractors.py deleted file mode 100644 index eb8ea165..00000000 --- a/packages/abx-plugin-readability-extractor/extractors.py +++ /dev/null @@ -1,20 +0,0 @@ -__package__ = 'plugins_extractor.readability' - -from pathlib import Path - -from pydantic_pkgr import BinName - -from abx.archivebox.base_extractor import BaseExtractor - -from .binaries import READABILITY_BINARY - - -class ReadabilityExtractor(BaseExtractor): - name: str = 'readability' - binary: BinName = READABILITY_BINARY.name - - def get_output_path(self, snapshot) -> Path: - return Path(snapshot.link_dir) / 'readability' / 'content.html' - - -READABILITY_EXTRACTOR = ReadabilityExtractor() diff --git a/packages/abx-plugin-readability-extractor/pyproject.toml b/packages/abx-plugin-readability-extractor/pyproject.toml deleted file mode 100644 index 5caa0adb..00000000 --- a/packages/abx-plugin-readability-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-readability-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-readwise-extractor/__init__.py b/packages/abx-plugin-readwise-extractor/__init__.py deleted file mode 100644 index 002eb58b..00000000 --- a/packages/abx-plugin-readwise-extractor/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.readwise' -__id__ = 'readwise' -__label__ = 'readwise' -__version__ = '2024.10.21' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import READWISE_CONFIG - - return { - __id__: READWISE_CONFIG - } - -@abx.hookimpl -def ready(): - from .config import READWISE_CONFIG - READWISE_CONFIG.validate() diff --git a/packages/abx-plugin-readwise-extractor/config.py b/packages/abx-plugin-readwise-extractor/config.py deleted file mode 100644 index 106aaf06..00000000 --- a/packages/abx-plugin-readwise-extractor/config.py +++ /dev/null @@ -1,17 +0,0 @@ -__package__ = 'plugins_extractor.readwise' - -from typing import Dict -from pathlib import Path - -from pydantic import Field - -from abx.archivebox.base_configset import BaseConfigSet - -from archivebox.config import CONSTANTS - - -class ReadwiseConfig(BaseConfigSet): - READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db") - READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} - -READWISE_CONFIG = ReadwiseConfig() diff --git a/packages/abx-plugin-readwise-extractor/pyproject.toml b/packages/abx-plugin-readwise-extractor/pyproject.toml deleted file mode 100644 index 7df49b56..00000000 --- a/packages/abx-plugin-readwise-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-readwise-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-ripgrep-search/__init__.py b/packages/abx-plugin-ripgrep-search/__init__.py deleted file mode 100644 index ac1e417c..00000000 --- a/packages/abx-plugin-ripgrep-search/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -__package__ = 'plugins_search.ripgrep' -__label__ = 'ripgrep' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/BurntSushi/ripgrep' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'ripgrep': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import RIPGREP_CONFIG - - return { - 'ripgrep': RIPGREP_CONFIG - } - - -@abx.hookimpl -def get_BINARIES(): - from .binaries import RIPGREP_BINARY - - return { - 'ripgrep': RIPGREP_BINARY - } - - -@abx.hookimpl -def get_SEARCHBACKENDS(): - from .searchbackend import RIPGREP_SEARCH_BACKEND - - return { - 'ripgrep': RIPGREP_SEARCH_BACKEND, - } diff --git a/packages/abx-plugin-ripgrep-search/pyproject.toml b/packages/abx-plugin-ripgrep-search/pyproject.toml deleted file mode 100644 index c79821d1..00000000 --- a/packages/abx-plugin-ripgrep-search/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-ripgrep-search" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-singlefile-extractor/extractors.py b/packages/abx-plugin-singlefile-extractor/extractors.py deleted file mode 100644 index fedbe801..00000000 --- a/packages/abx-plugin-singlefile-extractor/extractors.py +++ /dev/null @@ -1,19 +0,0 @@ -__package__ = 'plugins_extractor.singlefile' - -from pathlib import Path - -from pydantic_pkgr import BinName -from abx.archivebox.base_extractor import BaseExtractor - -from .binaries import SINGLEFILE_BINARY - - -class SinglefileExtractor(BaseExtractor): - name: str = 'singlefile' - binary: BinName = SINGLEFILE_BINARY.name - - def get_output_path(self, snapshot) -> Path: - return Path(snapshot.link_dir) / 'singlefile.html' - - -SINGLEFILE_EXTRACTOR = SinglefileExtractor() diff --git a/packages/abx-plugin-singlefile-extractor/pyproject.toml b/packages/abx-plugin-singlefile-extractor/pyproject.toml deleted file mode 100644 index b0c9df1b..00000000 --- a/packages/abx-plugin-singlefile-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-singlefile-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-sonic-search/pyproject.toml b/packages/abx-plugin-sonic-search/pyproject.toml deleted file mode 100644 index a61d17c7..00000000 --- a/packages/abx-plugin-sonic-search/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-sonic-search" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-sqlitefts-search/__init__.py b/packages/abx-plugin-sqlitefts-search/__init__.py deleted file mode 100644 index 63fb1b12..00000000 --- a/packages/abx-plugin-sqlitefts-search/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_search.sqlitefts' -__label__ = 'sqlitefts' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'sqlitefts': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import SQLITEFTS_CONFIG - - return { - 'sqlitefts': SQLITEFTS_CONFIG - } - - -@abx.hookimpl -def get_SEARCHBACKENDS(): - from .searchbackend import SQLITEFTS_SEARCH_BACKEND - - return { - 'sqlitefts': SQLITEFTS_SEARCH_BACKEND, - } diff --git a/packages/abx-plugin-sqlitefts-search/pyproject.toml b/packages/abx-plugin-sqlitefts-search/pyproject.toml deleted file mode 100644 index f635fb16..00000000 --- a/packages/abx-plugin-sqlitefts-search/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-sqlitefts-search" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-wget-extractor/__init__.py b/packages/abx-plugin-wget-extractor/__init__.py deleted file mode 100644 index e2a36aa4..00000000 --- a/packages/abx-plugin-wget-extractor/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -__package__ = 'plugins_extractor.wget' -__id__ = 'wget' -__label__ = 'WGET' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/wget' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import WGET_CONFIG - - return { - __id__: WGET_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import WGET_BINARY - - return { - 'wget': WGET_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR - - return { - 'wget': WGET_EXTRACTOR, - 'warc': WARC_EXTRACTOR, - } - -@abx.hookimpl -def ready(): - from .config import WGET_CONFIG - WGET_CONFIG.validate() diff --git a/packages/abx-plugin-wget-extractor/extractors.py b/packages/abx-plugin-wget-extractor/extractors.py deleted file mode 100644 index 86fa3923..00000000 --- a/packages/abx-plugin-wget-extractor/extractors.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.wget' - -from pathlib import Path - -from pydantic_pkgr import BinName - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import WGET_BINARY -from .wget_util import wget_output_path - -class WgetExtractor(BaseExtractor): - name: ExtractorName = 'wget' - binary: BinName = WGET_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - wget_index_path = wget_output_path(snapshot.as_link()) - if wget_index_path: - return Path(wget_index_path) - return None - -WGET_EXTRACTOR = WgetExtractor() - - -class WarcExtractor(BaseExtractor): - name: ExtractorName = 'warc' - binary: BinName = WGET_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) - if warc_files: - return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] - return None - - -WARC_EXTRACTOR = WarcExtractor() - diff --git a/packages/abx-plugin-wget-extractor/pyproject.toml b/packages/abx-plugin-wget-extractor/pyproject.toml deleted file mode 100644 index 21445c18..00000000 --- a/packages/abx-plugin-wget-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-wget-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-plugin-ytdlp-extractor/pyproject.toml b/packages/abx-plugin-ytdlp-extractor/pyproject.toml deleted file mode 100644 index 1b6b4e30..00000000 --- a/packages/abx-plugin-ytdlp-extractor/pyproject.toml +++ /dev/null @@ -1,7 +0,0 @@ -[project] -name = "abx-ytdlp-extractor" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [] diff --git a/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py deleted file mode 100644 index 5b646bf9..00000000 --- a/packages/abx-spec-archivebox/abx_spec_archivebox/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -__package__ = 'abx_spec_archivebox' - -# from .effects import * -# from .events import * -# from .reads import * -# from .writes import * -# from .states import * diff --git a/packages/abx-spec-config/abx_spec_config/__init__.py b/packages/abx-spec-config/abx_spec_config/__init__.py deleted file mode 100644 index cc840381..00000000 --- a/packages/abx-spec-config/abx_spec_config/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from pathlib import Path -from typing import Dict, Any - -from benedict import benedict - - -import abx - -from .base_configset import BaseConfigSet, ConfigKeyStr - - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_collection_config_path() -> Path: - return Path(os.getcwd()) / "ArchiveBox.conf" - - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_system_config_path() -> Path: - return Path('~/.config/abx/abx.conf').expanduser() - - -@abx.hookspec -@abx.hookimpl -def get_CONFIG() -> Dict[abx.PluginId, BaseConfigSet]: - """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}""" - return {} - - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_CONFIGS() -> Dict[abx.PluginId, BaseConfigSet]: - """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}""" - return abx.as_dict(abx.pm.hook.get_CONFIG()) - - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_FLAT_CONFIG() -> Dict[ConfigKeyStr, Any]: - """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}""" - return benedict({ - key: value - for configset in get_CONFIGS().values() - for key, value in benedict(configset).items() - }) - - -# TODO: add read_config_file(), write_config_file() hooks diff --git a/packages/abx-spec-django/abx_spec_django/__init__.py b/packages/abx-spec-django/abx_spec_django/__init__.py deleted file mode 100644 index 20f62d2b..00000000 --- a/packages/abx-spec-django/abx_spec_django/__init__.py +++ /dev/null @@ -1,140 +0,0 @@ -import abx - -########################################################################################### - -@abx.hookspec -@abx.hookimpl -def get_INSTALLED_APPS(): - """Return a list of apps to add to INSTALLED_APPS""" - # e.g. ['your_plugin_type.plugin_name'] - return ['abx_spec_django'] - -# @abx.hookspec -# @abx.hookimpl -# def register_INSTALLED_APPS(INSTALLED_APPS): -# """Mutate INSTALLED_APPS in place to add your app in a specific position""" -# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') -# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name') -# pass - - -@abx.hookspec -@abx.hookimpl -def get_TEMPLATE_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/templates'] - -# @abx.hookspec -# @abx.hookimpl -# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): -# """Install django settings""" -# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') -# pass - - -@abx.hookspec -@abx.hookimpl -def get_STATICFILES_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/static'] - -# @abx.hookspec -# @abx.hookimpl -# def register_STATICFILES_DIRS(STATICFILES_DIRS): -# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" -# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') -# pass - - -@abx.hookspec -@abx.hookimpl -def get_MIDDLEWARES(): - return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] - -# @abx.hookspec -# @abx.hookimpl -# def register_MIDDLEWARE(MIDDLEWARE): -# """Mutate MIDDLEWARE in place to add your middleware in a specific position""" -# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') -# pass - - -@abx.hookspec -@abx.hookimpl -def get_AUTHENTICATION_BACKENDS(): - return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] - -# @abx.hookspec -# @abx.hookimpl -# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): -# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" -# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') -# pass - -@abx.hookspec -@abx.hookimpl -def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME): - return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}} - -# @abx.hookspec -# @abx.hookimpl -# def register_DJANGO_HUEY(DJANGO_HUEY): -# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" -# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' -# pass - - -@abx.hookspec -@abx.hookimpl -def get_ADMIN_DATA_VIEWS_URLS(): - return [] - -# @abx.hookspec -# @abx.hookimpl -# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): -# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" -# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') -# pass - - -# @abx.hookspec -# @abx.hookimpl -# def register_settings(settings): -# """Mutate settings in place to add your settings / modify existing settings""" -# # settings.SOME_KEY = 'some_value' -# pass - - -########################################################################################### - -@abx.hookspec -@abx.hookimpl -def get_urlpatterns(): - return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] - -# @abx.hookspec -# @abx.hookimpl -# def register_urlpatterns(urlpatterns): -# """Mutate urlpatterns in place to add your urlpatterns in a specific position""" -# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) -# pass - -########################################################################################### - - - -@abx.hookspec -@abx.hookimpl -def register_admin(admin_site): - """Register django admin views/models with the main django admin site instance""" - # e.g. admin_site.register(your_model, your_admin_class) - pass - - -########################################################################################### - - -@abx.hookspec -@abx.hookimpl -def ready(): - """Called when Django apps app.ready() are triggered""" - # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate() - pass diff --git a/packages/abx-spec-django/abx_spec_django/apps.py b/packages/abx-spec-django/abx_spec_django/apps.py deleted file mode 100644 index 667b74c0..00000000 --- a/packages/abx-spec-django/abx_spec_django/apps.py +++ /dev/null @@ -1,14 +0,0 @@ -__package__ = 'abx_spec_django' - -from django.apps import AppConfig - -import abx - - -class ABXConfig(AppConfig): - name = 'abx_spec_django' - - def ready(self): - from django.conf import settings - - abx.pm.hook.ready(settings=settings) diff --git a/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py deleted file mode 100644 index 4665452a..00000000 --- a/packages/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py +++ /dev/null @@ -1,72 +0,0 @@ -import os - -from typing import Dict -from pathlib import Path - -import abx - -from pydantic_pkgr import Binary, BinProvider - -########################################################################################### - -@abx.hookspec -@abx.hookimpl() -def get_BINPROVIDERS() -> Dict[str, BinProvider]: - return {} - -@abx.hookspec -@abx.hookimpl() -def get_BINARIES() -> Dict[str, Binary]: - return {} - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_BINPROVIDER(binprovider_name: str) -> BinProvider: - return abx.as_dict(abx.pm.hook.get_BINPROVIDERS())[binprovider_name] - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def get_BINARY(bin_name: str) -> BinProvider: - return abx.as_dict(abx.pm.hook.get_BINARYS())[bin_name] - - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def binary_load(binary: Binary, **kwargs) -> Binary: - loaded_binary = binary.load(**kwargs) - abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) - return loaded_binary - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def binary_install(binary: Binary, **kwargs) -> Binary: - loaded_binary = binary.install(**kwargs) - abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) - return loaded_binary - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def binary_load_or_install(binary: Binary, **kwargs) -> Binary: - loaded_binary = binary.load_or_install(**kwargs) - abx.pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) - return loaded_binary - -@abx.hookspec(firstresult=True) -@abx.hookimpl -def binary_symlink_to_bin_dir(binary: Binary, bin_dir: Path | None=None): - LIB_DIR = Path(abx.pm.hook.get_CONFIG().get('LIB_DIR', '/usr/local/share/abx')) - BIN_DIR = bin_dir or Path(abx.pm.hook.get_CONFIG().get('BIN_DIR', LIB_DIR / 'bin')) - - if not (binary.abspath and os.path.isfile(binary.abspath)): - return - - try: - BIN_DIR.mkdir(parents=True, exist_ok=True) - symlink = BIN_DIR / binary.name - symlink.unlink(missing_ok=True) - symlink.symlink_to(binary.abspath) - symlink.chmod(0o777) # make sure its executable by everyone - except Exception: - # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') - # not actually needed, we can just run without it - pass diff --git a/packages/abx-spec-searchbackend/abx_spec_searchbackend.py b/packages/abx-spec-searchbackend/abx_spec_searchbackend.py deleted file mode 100644 index 66b34114..00000000 --- a/packages/abx-spec-searchbackend/abx_spec_searchbackend.py +++ /dev/null @@ -1,29 +0,0 @@ -import abc -from typing import Iterable, List, Dict - -import abx - -@abx.hookspec -@abx.hookimpl -def get_SEARCHBACKENDS() -> Dict[abx.PluginId, 'BaseSearchBackend']: - return {} - - -class BaseSearchBackend(abc.ABC): - name: str - - @staticmethod - @abc.abstractmethod - def index(snapshot_id: str, texts: List[str]): - return - - @staticmethod - @abc.abstractmethod - def flush(snapshot_ids: Iterable[str]): - return - - @staticmethod - @abc.abstractmethod - def search(text: str) -> List[str]: - raise NotImplementedError("search method must be implemented by subclass") - diff --git a/packages/abx/abx.py b/packages/abx/abx.py deleted file mode 100644 index 0ce28462..00000000 --- a/packages/abx/abx.py +++ /dev/null @@ -1,344 +0,0 @@ -__package__ = 'abx' -__id__ = 'abx' -__label__ = 'ABX' -__author__ = 'Nick Sweeting' -__homepage__ = 'https://github.com/ArchiveBox' -__order__ = 0 - - -import sys -import inspect -import importlib -import itertools -from pathlib import Path -from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypedDict, Type, cast -from types import ModuleType -from typing_extensions import Annotated -from functools import cache - -from benedict import benedict -from pydantic import AfterValidator - -from pluggy import HookspecMarker, HookimplMarker, PluginManager, HookimplOpts - -spec = hookspec = HookspecMarker("abx") -impl = hookimpl = HookimplMarker("abx") - - - -AttrName = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_'))] -PluginId = Annotated[str, AfterValidator(lambda x: x.isidentifier() and not x.startswith('_') and x.islower())] - -class PluginInfo(TypedDict, total=False): - id: PluginId - package: AttrName - label: str - version: str - author: str - homepage: str - dependencies: List[str] - - source_code: str - hooks: Dict[AttrName, Callable] - module: ModuleType - - - -class PatchedPluginManager(PluginManager): - """ - Patch to fix pluggy's PluginManager to work with pydantic models. - See: https://github.com/pytest-dev/pluggy/pull/536 - """ - def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None: - # IMPORTANT: @property methods can have side effects, and are never hookimpl - # if attr is a property, skip it in advance - plugin_class = plugin if inspect.isclass(plugin) else type(plugin) - if isinstance(getattr(plugin_class, name, None), property): - return None - - # pydantic model fields are like attrs and also can never be hookimpls - plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__") - if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}): - # pydantic models mess with the class and attr __signature__ - # so inspect.isroutine(...) throws exceptions and cant be used - return None - - try: - return super().parse_hookimpl_opts(plugin, name) - except AttributeError: - return super().parse_hookimpl_opts(type(plugin), name) - -pm = PatchedPluginManager("abx") - - - -@hookspec(firstresult=True) -@hookimpl -@cache -def get_PLUGIN_ORDER(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]: - plugin_dir = None - plugin_module = None - - if isinstance(plugin, str) or isinstance(plugin, Path): - if str(plugin).endswith('.py'): - plugin_dir = Path(plugin).parent - plugin_id = plugin_dir.name - elif '/' in str(plugin): - # assume it's a path to a plugin directory - plugin_dir = Path(plugin) - plugin_id = plugin_dir.name - elif str(plugin).isidentifier(): - # assume it's a plugin_id - plugin_id = str(plugin) - - elif inspect.ismodule(plugin) or inspect.isclass(plugin): - plugin_module = plugin - plugin_dir = Path(str(plugin_module.__file__)).parent - plugin_id = plugin_dir.name - else: - raise ValueError(f'Invalid plugin, cannot get order: {plugin}') - - if plugin_dir: - try: - # if .plugin_order file exists, use it to set the load priority - order = int((plugin_dir / '.plugin_order').read_text()) - return (order, plugin_dir) - except FileNotFoundError: - pass - - if not plugin_module: - try: - plugin_module = importlib.import_module(plugin_id) - except ImportError: - raise ValueError(f'Invalid plugin, cannot get order: {plugin}') - - if plugin_module and not plugin_dir: - plugin_dir = Path(str(plugin_module.__file__)).parent - - assert plugin_dir - - return (getattr(plugin_module, '__order__', 999), plugin_dir) - -# @hookspec -# @hookimpl -# def get_PLUGIN() -> Dict[PluginId, PluginInfo]: -# """Get the info for a single plugin, implemented by each plugin""" -# return { -# __id__: PluginInfo({ -# 'id': __id__, -# 'package': str(__package__), -# 'label': __id__, -# 'version': __version__, -# 'author': __author__, -# 'homepage': __homepage__, -# 'dependencies': __dependencies__, -# }), -# } - -@hookspec(firstresult=True) -@hookimpl -@cache -def get_PLUGIN_METADATA(plugin: PluginId | ModuleType | Type) -> PluginInfo: - # TODO: remove get_PLUGIN hook in favor of pyproject.toml and __attr__s metdata - # having three methods to detect plugin metadata is overkill - - assert plugin - - # import the plugin module by its name - if isinstance(plugin, str): - module = importlib.import_module(plugin) - plugin_id = plugin - elif inspect.ismodule(plugin) or inspect.isclass(plugin): - module = plugin - plugin_id = plugin.__package__ - else: - raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}') - - assert module.__file__ - - # load the plugin info from the plugin/__init__.py __attr__s if they exist - plugin_module_attrs = { - 'id': getattr(module, '__id__', plugin_id), - 'name': getattr(module, '__id__', plugin_id), - 'label': getattr(module, '__label__', plugin_id), - 'version': getattr(module, '__version__', '0.0.1'), - 'author': getattr(module, '__author__', 'Unknown'), - 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'), - 'dependencies': getattr(module, '__dependencies__', []), - } - - # load the plugin info from the plugin.get_PLUGIN() hook method if it has one - plugin_info_dict = {} - if hasattr(module, 'get_PLUGIN'): - plugin_info_dict = { - key.lower(): value - for key, value in module.get_PLUGIN().items() - } - - # load the plugin info from the plugin/pyproject.toml file if it has one - plugin_toml_info = {} - try: - # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir - plugin_toml_info = benedict.from_toml((Path(module.__file__).parent / 'pyproject.toml').read_text()).project - except Exception: - try: - # try loading ../pyproject.toml next in case the plugin is in a packge dir - plugin_toml_info = benedict.from_toml((Path(module.__file__).parent.parent / 'pyproject.toml').read_text()).project - except Exception as e: - print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, Path(module.__file__).parent, 'ERROR:', e) - - # merge the plugin info from all sources + add dyanmically calculated info - return cast(PluginInfo, benedict(PluginInfo(**{ - 'id': plugin_id, - **plugin_module_attrs, - **plugin_info_dict, - **plugin_toml_info, - 'package': module.__package__, - 'module': module, - 'order': pm.hook.get_PLUGIN_ORDER(plugin=module), - 'source_code': module.__file__, - 'hooks': get_plugin_hooks(module), - }))) - -@hookspec(firstresult=True) -@hookimpl -def get_ALL_PLUGINS() -> Dict[PluginId, PluginInfo]: - """Get a flat dictionary of all plugins {plugin_id: {...plugin_metadata}}""" - return as_dict(pm.hook.get_PLUGIN()) - - -@hookspec(firstresult=True) -@hookimpl -def get_ALL_PLUGINS_METADATA() -> Dict[PluginId, PluginInfo]: - """Get the metadata for all the plugins registered with Pluggy.""" - plugins = {} - for plugin_module in pm.get_plugins(): - plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin_module) - assert 'id' in plugin_info - plugins[plugin_info['id']] = plugin_info - return benedict(plugins) - -@hookspec(firstresult=True) -@hookimpl -def get_ALL_PLUGIN_HOOK_NAMES() -> Set[str]: - """Get a set of all hook names across all plugins""" - return { - hook_name - for plugin_module in pm.get_plugins() - for hook_name in get_plugin_hooks(plugin_module) - } - -pm.add_hookspecs(sys.modules[__name__]) -pm.register(sys.modules[__name__]) - - -###### PLUGIN DISCOVERY AND LOADING ######################################################## - - - -def register_hookspecs(plugin_ids: Iterable[PluginId]): - """ - Register all the hookspecs from a list of module names. - """ - for plugin_id in plugin_ids: - hookspec_module = importlib.import_module(plugin_id) - pm.add_hookspecs(hookspec_module) - - -def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]: - """ - Find all the plugins in a given directory. Just looks for an __init__.py file. - """ - return { - plugin_entrypoint.parent.name: plugin_entrypoint.parent - for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=pm.hook.get_PLUGIN_ORDER) # type:ignore - if plugin_entrypoint.parent.name != 'abx' - } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" - - -def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]: - """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" - import importlib.metadata - - DETECTED_PLUGINS = {} # module_name: module_dir_path - for dist in list(importlib.metadata.distributions()): - for entrypoint in dist.entry_points: - if entrypoint.group != group or pm.is_blocked(entrypoint.name): - continue - DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent - # pm.register(plugin, name=ep.name) - # pm._plugin_distinfo.append((plugin, DistFacade(dist))) - return DETECTED_PLUGINS - - - -# Load all plugins from pip packages, archivebox built-ins, and user plugins -def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]): - """ - Load all the plugins from a dictionary of module names and directory paths. - """ - LOADED_PLUGINS = {} - for plugin in plugins: - plugin_info = pm.hook.get_PLUGIN_METADATA(plugin=plugin) - assert 'id' in plugin_info and 'module' in plugin_info - if plugin_info['module'] in pm.get_plugins(): - LOADED_PLUGINS[plugin_info['id']] = plugin_info - continue - try: - pm.add_hookspecs(plugin_info['module']) - except ValueError: - # not all plugins register new hookspecs, some only have hookimpls - pass - pm.register(plugin_info['module']) - LOADED_PLUGINS[plugin_info['id']] = plugin_info - # print(f' √ Loaded plugin: {plugin_id}') - return benedict(LOADED_PLUGINS) - -@cache -def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]: - """Get all the functions marked with @hookimpl on a module.""" - if not plugin: - return {} - - hooks = {} - - if isinstance(plugin, str): - plugin_module = importlib.import_module(plugin) - elif inspect.ismodule(plugin) or inspect.isclass(plugin): - plugin_module = plugin - else: - raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}') - - for attr_name in dir(plugin_module): - if attr_name.startswith('_'): - continue - try: - attr = getattr(plugin_module, attr_name) - if isinstance(attr, Callable): - if pm.parse_hookimpl_opts(plugin_module, attr_name): - hooks[attr_name] = attr - except Exception as e: - print(f'Error getting hookimpls for {plugin}: {e}') - - return hooks - - -def as_list(results) -> List[Any]: - """Flatten a list of lists returned by a pm.hook.call() into a single list""" - return list(itertools.chain(*results)) - - -def as_dict(results: Dict[str, Dict[PluginId, Any]] | List[Dict[PluginId, Any]]) -> Dict[PluginId, Any]: - """Flatten a list of dicts returned by a pm.hook.call() into a single dict""" - if isinstance(results, (dict, benedict)): - results_list = results.values() - else: - results_list = results - - return benedict({ - result_id: result - for plugin_results in results_list - for result_id, result in dict(plugin_results).items() - }) - - diff --git a/packages/archivebox-pocket/.circleci/config.yml b/packages/archivebox-pocket/.circleci/config.yml deleted file mode 100644 index a20a6aae..00000000 --- a/packages/archivebox-pocket/.circleci/config.yml +++ /dev/null @@ -1,61 +0,0 @@ -version: 2.1 -orbs: - python: circleci/python@2.0.3 - -jobs: - build_and_test_3_7: - docker: - - image: circleci/python:3.7 - executor: python/default - steps: - - checkout - - python/install-packages: - pkg-manager: pip - - run: - name: Run tests - command: nosetests - - build_and_test_3_8: - docker: - - image: circleci/python:3.8 - executor: python/default - steps: - - checkout - - python/install-packages: - pkg-manager: pip - - run: - name: Run tests - command: nosetests - - build_and_test_3_9: - docker: - - image: circleci/python:3.9 - executor: python/default - steps: - - checkout - - python/install-packages: - pkg-manager: pip - - run: - name: Run tests - command: nosetests - - build_and_test_3_10: - docker: - - image: circleci/python:3.10 - executor: python/default - steps: - - checkout - - python/install-packages: - pkg-manager: pip - - run: - name: Run tests - command: nosetests - - -workflows: - test_pocket: - jobs: - - build_and_test_3_7 - - build_and_test_3_8 - - build_and_test_3_9 - - build_and_test_3_10 diff --git a/packages/archivebox-pocket/.gitignore b/packages/archivebox-pocket/.gitignore deleted file mode 100644 index 8acafa3c..00000000 --- a/packages/archivebox-pocket/.gitignore +++ /dev/null @@ -1,43 +0,0 @@ -*.py[co] - -# Packages -*.egg -*.egg-info -dist -build -eggs -parts -bin -var -sdist -develop-eggs -.installed.cfg -.pypirc - -# Installer logs -pip-log.txt - -# Unit test / coverage reports -.coverage -.tox - -#Translations -*.mo - -#Mr Developer -.mr.developer.cfg - -# Virtualenv -include/ -lib/ -local/ -.Python - -# ViM files -.*.swp -.*.swo - -# Misc -*.log -*.pid -*.sql diff --git a/packages/archivebox-pocket/LICENSE.md b/packages/archivebox-pocket/LICENSE.md deleted file mode 100644 index 3b145165..00000000 --- a/packages/archivebox-pocket/LICENSE.md +++ /dev/null @@ -1,27 +0,0 @@ -Copyright (c) 2014, Tapan Pandita -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, this - list of conditions and the following disclaimer in the documentation and/or - other materials provided with the distribution. - -* Neither the name of pocket nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/archivebox-pocket/MANIFEST.in b/packages/archivebox-pocket/MANIFEST.in deleted file mode 100644 index 7425f8e8..00000000 --- a/packages/archivebox-pocket/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -include LICENSE.md -include README.md diff --git a/packages/archivebox-pocket/README.md b/packages/archivebox-pocket/README.md deleted file mode 100644 index 6b2430be..00000000 --- a/packages/archivebox-pocket/README.md +++ /dev/null @@ -1,66 +0,0 @@ -Pocket -====== -[![CircleCI](https://img.shields.io/circleci/build/github/tapanpandita/pocket/master?logo=CircleCI)](https://circleci.com/gh/tapanpandita/pocket) -[![Pypi](https://img.shields.io/pypi/v/pocket.svg)](https://pypi.python.org/pypi/pocket) -[![PyPI - Downloads](https://img.shields.io/pypi/dm/pocket.svg)](https://pypi.python.org/pypi/pocket) -![GitHub](https://img.shields.io/github/license/tapanpandita/pocket.svg) - - -A python wrapper for the [pocket api](http://getpocket.com/api/docs). - -Installation ------------- -``` -pip install pocket -``` - -Usage ------- - -You'll need your pocket consumer key. You can find this from your account page. -You will also need the access token for the account you want to modify. -Then, you need to create an instance of the pocket object - -```python -import pocket - -pocket_instance = pocket.Pocket(consumer_key, access_token) -``` - -### Chaining Modify Methods - -All the modify methods can be chained together for creating one bulk query. If you don't wish to chain the methods, just pass `wait=False`. - -```python -import pocket - -pocket_instance = pocket.Pocket(consumer_key, access_token) - -# perfoms all these actions in one request -# NOTE: Each individual method returns the instance itself. The response -# dictionary is not returned till commit is called on the instance. -response, headers = pocket_instance.archive(item_id1).archive(item_id2).favorite(item_id3).delete(item_id4).commit() - -# performs action immediately and returns a dictionary -pocket_instance.archive(item_id1, wait=False) -``` - -### OAUTH - -To get request token, use the get_request_token class method. To get the access token use the get_access_token method. - -```python -from pocket import Pocket - -request_token = Pocket.get_request_token(consumer_key=consumer_key, redirect_uri=redirect_uri) - -# URL to redirect user to, to authorize your app -auth_url = Pocket.get_auth_url(code=request_token, redirect_uri=redirect_uri) -# e.g. import subprocess; subprocess.run(['xdg-open', auth_url]) - -user_credentials = Pocket.get_credentials(consumer_key=consumer_key, code=request_token) - -access_token = user_credentials['access_token'] -``` - -For detailed documentation of the methods available, please visit the official [pocket api documentation](http://getpocket.com/api/docs). diff --git a/packages/archivebox-pocket/pocket.py b/packages/archivebox-pocket/pocket.py deleted file mode 100644 index b5b8d2fa..00000000 --- a/packages/archivebox-pocket/pocket.py +++ /dev/null @@ -1,366 +0,0 @@ -import requests -import json -from functools import wraps - - -class PocketException(Exception): - ''' - Base class for all pocket exceptions - http://getpocket.com/developer/docs/errors - - ''' - pass - - -class InvalidQueryException(PocketException): - pass - - -class AuthException(PocketException): - pass - - -class RateLimitException(PocketException): - ''' - http://getpocket.com/developer/docs/rate-limits - - ''' - pass - - -class ServerMaintenanceException(PocketException): - pass - -EXCEPTIONS = { - 400: InvalidQueryException, - 401: AuthException, - 403: RateLimitException, - 503: ServerMaintenanceException, -} - - -def method_wrapper(fn): - - @wraps(fn) - def wrapped(self, *args, **kwargs): - arg_names = list(fn.__code__.co_varnames) - arg_names.remove('self') - kwargs.update(dict(zip(arg_names, args))) - - url = self.api_endpoints[fn.__name__] - payload = dict([ - (k, v) for k, v in kwargs.items() - if v is not None - ]) - payload.update(self.get_payload()) - - return self.make_request(url, payload) - - return wrapped - - -def bulk_wrapper(fn): - - @wraps(fn) - def wrapped(self, *args, **kwargs): - arg_names = list(fn.__code__.co_varnames) - arg_names.remove('self') - kwargs.update(dict(zip(arg_names, args))) - - wait = kwargs.get('wait', True) - query = dict( - [(k, v) for k, v in kwargs.items() if v is not None] - ) - # TODO: Fix this hack - query['action'] = 'add' if fn.__name__ == 'bulk_add' else fn.__name__ - - if wait: - self.add_bulk_query(query) - return self - else: - url = self.api_endpoints['send'] - payload = { - 'actions': [query], - } - payload.update(self.get_payload()) - return self.make_request( - url, - json.dumps(payload), - headers={'content-type': 'application/json'}, - ) - - return wrapped - - -class Pocket(object): - ''' - This class implements a basic python wrapper around the pocket api. For a - detailed documentation of the methods and what they do please refer the - official pocket api documentation at - http://getpocket.com/developer/docs/overview - - ''' - api_endpoints = dict( - (method, 'https://getpocket.com/v3/%s' % method) - for method in "add,send,get".split(",") - ) - - statuses = { - 200: 'Request was successful', - 400: 'Invalid request, please make sure you follow the ' - 'documentation for proper syntax', - 401: 'Problem authenticating the user', - 403: 'User was authenticated, but access denied due to lack of ' - 'permission or rate limiting', - 503: 'Pocket\'s sync server is down for scheduled maintenance.', - } - - def __init__(self, consumer_key, access_token): - self.consumer_key = consumer_key - self.access_token = access_token - self._bulk_query = [] - - self._payload = { - 'consumer_key': self.consumer_key, - 'access_token': self.access_token, - } - - def get_payload(self): - return self._payload - - def add_bulk_query(self, query): - self._bulk_query.append(query) - - @staticmethod - def _post_request(url, payload, headers): - r = requests.post(url, data=payload, headers=headers) - return r - - @classmethod - def _make_request(cls, url, payload, headers=None): - r = cls._post_request(url, payload, headers) - - if r.status_code > 399: - error_msg = cls.statuses.get(r.status_code) - extra_info = r.headers.get('X-Error') - raise EXCEPTIONS.get(r.status_code, PocketException)( - '%s. %s' % (error_msg, extra_info) - ) - - return r.json() or r.text, r.headers - - @classmethod - def make_request(cls, url, payload, headers=None): - return cls._make_request(url, payload, headers) - - @method_wrapper - def add(self, url, title=None, tags=None, tweet_id=None): - ''' - This method allows you to add a page to a user's list. - In order to use the /v3/add endpoint, your consumer key must have the - "Add" permission. - http://getpocket.com/developer/docs/v3/add - - ''' - - @method_wrapper - def get( - self, state=None, favorite=None, tag=None, contentType=None, - sort=None, detailType=None, search=None, domain=None, since=None, - count=None, offset=None - ): - ''' - This method allows you to retrieve a user's list. It supports - retrieving items changed since a specific time to allow for syncing. - http://getpocket.com/developer/docs/v3/retrieve - - ''' - - @method_wrapper - def send(self, actions): - ''' - This method allows you to make changes to a user's list. It supports - adding new pages, marking pages as read, changing titles, or updating - tags. Multiple changes to items can be made in one request. - http://getpocket.com/developer/docs/v3/modify - - ''' - - @bulk_wrapper - def bulk_add( - self, item_id, ref_id=None, tags=None, time=None, title=None, - url=None, wait=True - ): - ''' - Add a new item to the user's list - http://getpocket.com/developer/docs/v3/modify#action_add - - ''' - - @bulk_wrapper - def archive(self, item_id, time=None, wait=True): - ''' - Move an item to the user's archive - http://getpocket.com/developer/docs/v3/modify#action_archive - - ''' - - @bulk_wrapper - def readd(self, item_id, time=None, wait=True): - ''' - Re-add (unarchive) an item to the user's list - http://getpocket.com/developer/docs/v3/modify#action_readd - - ''' - - @bulk_wrapper - def favorite(self, item_id, time=None, wait=True): - ''' - Mark an item as a favorite - http://getpocket.com/developer/docs/v3/modify#action_favorite - - ''' - - @bulk_wrapper - def unfavorite(self, item_id, time=None, wait=True): - ''' - Remove an item from the user's favorites - http://getpocket.com/developer/docs/v3/modify#action_unfavorite - - ''' - - @bulk_wrapper - def delete(self, item_id, time=None, wait=True): - ''' - Permanently remove an item from the user's account - http://getpocket.com/developer/docs/v3/modify#action_delete - - ''' - - @bulk_wrapper - def tags_add(self, item_id, tags, time=None, wait=True): - ''' - Add one or more tags to an item - http://getpocket.com/developer/docs/v3/modify#action_tags_add - - ''' - - @bulk_wrapper - def tags_remove(self, item_id, tags, time=None, wait=True): - ''' - Remove one or more tags from an item - http://getpocket.com/developer/docs/v3/modify#action_tags_remove - - ''' - - @bulk_wrapper - def tags_replace(self, item_id, tags, time=None, wait=True): - ''' - Replace all of the tags for an item with one or more provided tags - http://getpocket.com/developer/docs/v3/modify#action_tags_replace - - ''' - - @bulk_wrapper - def tags_clear(self, item_id, time=None, wait=True): - ''' - Remove all tags from an item. - http://getpocket.com/developer/docs/v3/modify#action_tags_clear - - ''' - - @bulk_wrapper - def tag_rename(self, item_id, old_tag, new_tag, time=None, wait=True): - ''' - Rename a tag. This affects all items with this tag. - http://getpocket.com/developer/docs/v3/modify#action_tag_rename - - ''' - - def commit(self): - ''' - This method executes the bulk query, flushes stored queries and - returns the response - - ''' - url = self.api_endpoints['send'] - payload = { - 'actions': self._bulk_query, - } - payload.update(self._payload) - self._bulk_query = [] - - return self._make_request( - url, - json.dumps(payload), - headers={'content-type': 'application/json'}, - ) - - @classmethod - def get_request_token( - cls, consumer_key, redirect_uri='http://example.com/', state=None - ): - ''' - Returns the request token that can be used to fetch the access token - - ''' - headers = { - 'X-Accept': 'application/json', - } - url = 'https://getpocket.com/v3/oauth/request' - payload = { - 'consumer_key': consumer_key, - 'redirect_uri': redirect_uri, - } - - if state: - payload['state'] = state - - return cls._make_request(url, payload, headers)[0]['code'] - - @classmethod - def get_credentials(cls, consumer_key, code): - ''' - Fetches access token from using the request token and consumer key - - ''' - headers = { - 'X-Accept': 'application/json', - } - url = 'https://getpocket.com/v3/oauth/authorize' - payload = { - 'consumer_key': consumer_key, - 'code': code, - } - - return cls._make_request(url, payload, headers)[0] - - @classmethod - def get_access_token(cls, consumer_key, code): - return cls.get_credentials(consumer_key, code)['access_token'] - - @classmethod - def get_auth_url(cls, code, redirect_uri='http://example.com'): - auth_url = ('https://getpocket.com/auth/authorize' - '?request_token=%s&redirect_uri=%s' % (code, redirect_uri)) - return auth_url - - @classmethod - def auth( - cls, consumer_key, redirect_uri='http://example.com/', state=None, - ): - ''' - This is a test method for verifying if oauth worked - http://getpocket.com/developer/docs/authentication - - ''' - code = cls.get_request_token(consumer_key, redirect_uri, state) - - auth_url = 'https://getpocket.com/auth/authorize?request_token='\ - '%s&redirect_uri=%s' % (code, redirect_uri) - raw_input( - 'Please open %s in your browser to authorize the app and ' - 'press enter:' % auth_url - ) - - return cls.get_access_token(consumer_key, code) diff --git a/packages/archivebox-pocket/pyproject.toml b/packages/archivebox-pocket/pyproject.toml deleted file mode 100644 index 6acf8a57..00000000 --- a/packages/archivebox-pocket/pyproject.toml +++ /dev/null @@ -1,19 +0,0 @@ -[project] -name = "archivebox-pocket" -version = "0.3.7" -description = " api wrapper for getpocket.com" -readme = "README.md" -requires-python = ">=3.10" -dependencies = [ - "requests>=2.32.3", -] - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.sdist] -packages = ["."] - -[tool.hatch.build.targets.wheel] -packages = ["."] diff --git a/packages/archivebox-pocket/requirements.txt b/packages/archivebox-pocket/requirements.txt deleted file mode 100644 index 9598beea..00000000 --- a/packages/archivebox-pocket/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -coverage==3.7.1 -mock==1.0.1 -nose==1.3.0 -requests==2.20.0 diff --git a/packages/archivebox-pocket/setup.py b/packages/archivebox-pocket/setup.py deleted file mode 100644 index 5a5baba0..00000000 --- a/packages/archivebox-pocket/setup.py +++ /dev/null @@ -1,41 +0,0 @@ -from setuptools import setup - -setup( - name = "pocket", # pip install pocket - description = "api wrapper for getpocket.com", - #long_description=open('README.md', 'rt').read(), - - # version - # third part for minor release - # second when api changes - # first when it becomes stable someday - version = "0.3.7", - author = 'Tapan Pandita', - author_email = "tapan.pandita@gmail.com", - - url = 'http://github.com/tapanpandita/pocket/', - license = 'BSD', - - # as a practice no need to hard code version unless you know program wont - # work unless the specific versions are used - install_requires = ["requests>=2.32.3"], - - py_modules = ["pocket"], - - zip_safe = True, -) - -# TODO: Do all this and delete these lines -# register: Create an accnt on pypi, store your credentials in ~/.pypirc: -# -# [pypirc] -# servers = -# pypi -# -# [server-login] -# username: -# password: -# -# $ python setup.py register # one time only, will create pypi page for pocket -# $ python setup.py sdist --formats=gztar,zip upload # create a new release -# diff --git a/packages/archivebox-pocket/test_pocket.py b/packages/archivebox-pocket/test_pocket.py deleted file mode 100644 index 14e67f53..00000000 --- a/packages/archivebox-pocket/test_pocket.py +++ /dev/null @@ -1,52 +0,0 @@ -import unittest -import pocket -from mock import patch - - -class PocketTest(unittest.TestCase): - - def setUp(self): - self.consumer_key = 'consumer_key' - self.access_token = 'access_token' - - def tearDown(self): - pass - - def test_pocket_init(self): - pocket_instance = pocket.Pocket( - self.consumer_key, - self.access_token, - ) - - self.assertEqual(self.consumer_key, pocket_instance.consumer_key) - self.assertEqual(self.access_token, pocket_instance.access_token) - - def test_pocket_init_payload(self): - pocket_instance = pocket.Pocket( - self.consumer_key, - self.access_token, - ) - expected_payload = { - 'consumer_key': self.consumer_key, - 'access_token': self.access_token, - } - - self.assertEqual(expected_payload, pocket_instance._payload) - - def test_post_request(self): - mock_payload = { - 'consumer_key': self.consumer_key, - 'access_token': self.access_token, - } - mock_url = 'https://getpocket.com/v3/' - mock_headers = { - 'content-type': 'application/json', - } - - with patch('pocket.requests') as mock_requests: - pocket.Pocket._post_request(mock_url, mock_payload, mock_headers) - mock_requests.post.assert_called_once_with( - mock_url, - data=mock_payload, - headers=mock_headers, - ) diff --git a/pyproject.toml b/pyproject.toml index de870ada..58e7d82b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,19 +69,22 @@ dependencies = [ "typeid-python>=0.3.1", "psutil>=6.0.0", "supervisor>=4.2.5", - "python-crontab>=3.2.0", # for: archivebox schedule - "croniter>=3.0.3", # for: archivebox schedule - "ipython>=8.27.0", # for: archivebox shell - "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid + "python-crontab>=3.2.0", # for: archivebox schedule + "croniter>=3.0.3", # for: archivebox schedule + "ipython>=8.27.0", # for: archivebox shell + "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid "python-benedict[io,parse]>=0.33.2", "pydantic-settings>=2.5.2", "atomicwrites==1.4.1", "django-taggit==6.1.0", "base32-crockford==0.3.0", + "platformdirs>=4.3.6", + ############# Plugin Dependencies ################ # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "pydantic-pkgr>=0.5.4", - ############# Plugin Dependencies ################ + "abx>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-config>=0.1.0", "abx-spec-archivebox>=0.1.0", @@ -90,15 +93,34 @@ dependencies = [ "abx-spec-searchbackend>=0.1.0", "abx-plugin-default-binproviders>=2024.10.24", - "abx-plugin-pip-binprovider>=2024.10.24", - "abx-plugin-npm-binprovider>=2024.10.24", - "abx-plugin-playwright-binprovider>=2024.10.24", + "abx-plugin-pip>=2024.10.24", + "abx-plugin-npm>=2024.10.24", + "abx-plugin-playwright>=2024.10.24", + "abx-plugin-puppeteer>=2024.10.28", + + "abx-plugin-ripgrep-search>=2024.10.28", + "abx-plugin-sqlitefts-search>=2024.10.28", + "abx-plugin-sonic-search>=2024.10.28", + "abx-plugin-ldap-auth>=2024.10.28", + + "abx-plugin-curl>=2024.10.27", + "abx-plugin-wget>=2024.10.28", + "abx-plugin-git>=2024.10.28", + "abx-plugin-chrome>=2024.10.28", + "abx-plugin-ytdlp>=2024.10.28", + + "abx-plugin-title>=2024.10.27", + "abx-plugin-favicon>=2024.10.27", + # "abx-plugin-headers>=2024.10.27", + "abx-plugin-archivedotorg>=2024.10.28", + + "abx-plugin-singlefile>=2024.10.28", + "abx-plugin-readability>=2024.10.28", + "abx-plugin-mercury>=2024.10.28", + "abx-plugin-htmltotext>=2024.10.28", - # "abx-plugin-pocket", - # "abx-plugin-sonic", - # "abx-plugin-yt-dlp", "sonic-client>=1.0.0", - "yt-dlp>=2024.8.6", # for: media" + "yt-dlp>=2024.8.6", # for: media" ] [project.optional-dependencies] @@ -160,15 +182,38 @@ abx-spec-extractor = { workspace = true } abx-spec-searchbackend = { workspace = true } abx-plugin-default-binproviders = { workspace = true } -abx-plugin-pip-binprovider = { workspace = true } -abx-plugin-npm-binprovider = { workspace = true } -abx-plugin-playwright-binprovider = { workspace = true } +abx-plugin-pip = { workspace = true } +abx-plugin-npm = { workspace = true } +abx-plugin-playwright = { workspace = true } +abx-plugin-puppeteer = { workspace = true } +abx-plugin-ripgrep-search = { workspace = true } +abx-plugin-sqlitefts-search = { workspace = true } +abx-plugin-sonic-search = { workspace = true } +abx-plugin-ldap-auth = { workspace = true } + +abx-plugin-curl = { workspace = true } +abx-plugin-wget = { workspace = true } +abx-plugin-git = { workspace = true } +abx-plugin-chrome = { workspace = true } +abx-plugin-ytdlp = { workspace = true } + +abx-plugin-title = { workspace = true } +abx-plugin-favicon = { workspace = true } +# abx-plugin-headers = { workspace = true } +abx-plugin-archivedotorg = { workspace = true } + +abx-plugin-singlefile = { workspace = true } +abx-plugin-readability = { workspace = true } +abx-plugin-mercury = { workspace = true } +abx-plugin-htmltotext = { workspace = true } + pydantic-pkgr = { workspace = true } -archivebox-pocket = { workspace = true } +pocket = { workspace = true } [tool.uv.workspace] -members = ["packages/*"] +members = ["archivebox/vendor/*"] +exclude = ["archivebox/vendor/__pycache__"] [build-system] requires = ["pdm-backend"] @@ -183,7 +228,7 @@ package-dir = {"archivebox" = "archivebox"} line-length = 140 target-version = "py310" src = ["archivebox"] -exclude = ["*.pyi", "typings/", "migrations/", "vendor/"] +exclude = ["*.pyi", "typings/", "migrations/", "vendor/pocket"] # https://docs.astral.sh/ruff/rules/ [tool.ruff.lint] @@ -218,7 +263,7 @@ exclude = [ "**/node_modules", "**/__pycache__", "**/migrations", - "archivebox/vendor", + "archivebox/vendor/pocket", ] stubPath = "./archivebox/typings" venvPath = "." From d47d429e9db9b37ca0dedb9ad1242067c8f5e50f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 16:12:43 -0700 Subject: [PATCH 06/25] add placeholder pyproj --- archivebox/vendor/pocket | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket index e7970b63..b377c089 160000 --- a/archivebox/vendor/pocket +++ b/archivebox/vendor/pocket @@ -1 +1 @@ -Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5 +Subproject commit b377c08988fb8ff81a6fdcd4f53ec54948fc16c5 From d93aa469497ed21ce50655cfb5b83401c97035d2 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 18:47:45 -0700 Subject: [PATCH 07/25] fix django.forms.JSONField does not exist 500 error --- archivebox/__init__.py | 27 ++++++++++++------------- archivebox/config/version.py | 12 +++++++---- archivebox/core/admin_archiveresults.py | 4 ++-- archivebox/core/views.py | 1 - 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/archivebox/__init__.py b/archivebox/__init__.py index 69df1876..24863926 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -13,9 +13,9 @@ __package__ = 'archivebox' import os import sys - from pathlib import Path from typing import cast + ASCII_LOGO = """ β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ•—β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β•β•β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β•β•β• β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β–ˆβ–ˆβ•—β•šβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β• @@ -51,14 +51,13 @@ from .vendor import load_vendored_libs # noqa load_vendored_libs() # print('DONE LOADING VENDORED LIBRARIES') - +# Load ABX Plugin Specifications + Default Implementations import abx # noqa import abx_spec_archivebox # noqa -import abx_spec_config # noqa +import abx_spec_config # noqa import abx_spec_pydantic_pkgr # noqa -import abx_spec_django # noqa -import abx_spec_searchbackend # noqa - +import abx_spec_django # noqa +import abx_spec_searchbackend # noqa abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC) abx.pm.register(abx_spec_config.PLUGIN_SPEC()) @@ -72,30 +71,30 @@ abx.pm.register(abx_spec_django.PLUGIN_SPEC()) abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC) abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC()) - +# Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm) pm = abx.pm -# Load all installed ABX-compatible plugins +# Load all pip-installed ABX-compatible plugins ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx') -# Load all ArchiveBox-specific plugins + +# Load all built-in ArchiveBox plugins ARCHIVEBOX_BUILTIN_PLUGINS = { 'config': PACKAGE_DIR / 'config', 'core': PACKAGE_DIR / 'core', # 'search': PACKAGE_DIR / 'search', # 'core': PACKAGE_DIR / 'core', } + # Load all user-defined ArchiveBox plugins USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins') -# Merge all plugins together + +# Import all plugins and register them with ABX Plugin Manager ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS} - - -# Load ArchiveBox plugins LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS) - +# Setup basic config, constants, paths, and version from .config.constants import CONSTANTS # noqa from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .config.version import VERSION # noqa diff --git a/archivebox/config/version.py b/archivebox/config/version.py index 26df4592..026bfa64 100644 --- a/archivebox/config/version.py +++ b/archivebox/config/version.py @@ -45,7 +45,7 @@ def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR): @cache def get_COMMIT_HASH() -> Optional[str]: try: - git_dir = PACKAGE_DIR / '../.git' + git_dir = PACKAGE_DIR.parent / '.git' ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1] commit_hash = git_dir.joinpath(ref).read_text().strip() return commit_hash @@ -53,7 +53,7 @@ def get_COMMIT_HASH() -> Optional[str]: pass try: - return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip() + return list((PACKAGE_DIR.parent / '.git/refs/heads/').glob('*'))[0].read_text().strip() except Exception: pass @@ -62,8 +62,12 @@ def get_COMMIT_HASH() -> Optional[str]: @cache def get_BUILD_TIME() -> str: if IN_DOCKER: - docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] - return docker_build_end_time + try: + # if we're in the archivebox official docker image, /VERSION.txt will contain the build time + docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] + return docker_build_end_time + except Exception: + pass src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s') diff --git a/archivebox/core/admin_archiveresults.py b/archivebox/core/admin_archiveresults.py index aff7b1df..675f5f43 100644 --- a/archivebox/core/admin_archiveresults.py +++ b/archivebox/core/admin_archiveresults.py @@ -8,7 +8,7 @@ from django.utils.html import format_html, mark_safe from django.core.exceptions import ValidationError from django.urls import reverse, resolve from django.utils import timezone -from django.forms import forms +from django_jsonform.forms.fields import JSONFormField from huey_monitor.admin import TaskModel @@ -83,7 +83,7 @@ class ArchiveResultInline(admin.TabularInline): formset.form.base_fields['cmd_version'].initial = '-' formset.form.base_fields['pwd'].initial = str(snapshot.link_dir) formset.form.base_fields['created_by'].initial = request.user - formset.form.base_fields['cmd'] = forms.JSONField(initial=['-']) + formset.form.base_fields['cmd'] = JSONFormField(initial=['-']) formset.form.base_fields['output'].initial = 'Manually recorded cmd output...' if obj is not None: diff --git a/archivebox/core/views.py b/archivebox/core/views.py index bb1c234f..a56f93bc 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -12,7 +12,6 @@ from django.views import View from django.views.generic.list import ListView from django.views.generic import FormView from django.db.models import Q -from django.conf import settings from django.contrib import messages from django.contrib.auth.mixins import UserPassesTestMixin from django.views.decorators.csrf import csrf_exempt From a5d99b87b90e2d5cfe0b8e214e07c7ec6c1bfe5b Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 18:47:58 -0700 Subject: [PATCH 08/25] add more plugins --- click_test.py | 4 +- uv.lock | 592 ++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 431 insertions(+), 165 deletions(-) diff --git a/click_test.py b/click_test.py index 52d1d6e1..4e4a0e40 100644 --- a/click_test.py +++ b/click_test.py @@ -5,8 +5,6 @@ from archivebox.config.django import setup_django setup_django() -import abx.archivebox.writes - def parse_stdin_to_args(io=sys.stdin): for line in io.read().split('\n'): @@ -25,7 +23,7 @@ if not sys.stdin.isatty(): def extract(snapshot_ids_or_urls): for url_or_snapshot_id in snapshot_ids_or_urls: print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr) - for result in abx.archivebox.writes.extract(url_or_snapshot_id): + for result in archivebox.pm.hook.extract(url_or_snapshot_id): print(result) if __name__ == "__main__": diff --git a/uv.lock b/uv.lock index e4d6e7e4..87085d19 100644 --- a/uv.lock +++ b/uv.lock @@ -9,43 +9,44 @@ resolution-markers = [ [manifest] members = [ "abx", - "abx-archivedotorg-extractor", - "abx-chrome-extractor", - "abx-curl-extractor", - "abx-favicon-extractor", - "abx-git-extractor", - "abx-htmltotext-extractor", - "abx-ldap-auth", - "abx-mercury-extractor", + "abx-plugin-archivedotorg", + "abx-plugin-chrome", + "abx-plugin-curl", "abx-plugin-default-binproviders", - "abx-plugin-npm-binprovider", - "abx-plugin-pip-binprovider", - "abx-plugin-playwright-binprovider", - "abx-pocket-extractor", - "abx-puppeteer-binprovider", - "abx-readability-extractor", - "abx-readwise-extractor", - "abx-ripgrep-search", - "abx-singlefile-extractor", - "abx-sonic-search", + "abx-plugin-favicon", + "abx-plugin-git", + "abx-plugin-htmltotext", + "abx-plugin-ldap-auth", + "abx-plugin-mercury", + "abx-plugin-npm", + "abx-plugin-pip", + "abx-plugin-playwright", + "abx-plugin-pocket", + "abx-plugin-puppeteer", + "abx-plugin-readability", + "abx-plugin-readwise", + "abx-plugin-ripgrep-search", + "abx-plugin-singlefile", + "abx-plugin-sonic-search", + "abx-plugin-sqlitefts-search", + "abx-plugin-title", + "abx-plugin-wget", + "abx-plugin-ytdlp", "abx-spec-archivebox", "abx-spec-config", "abx-spec-django", "abx-spec-extractor", "abx-spec-pydantic-pkgr", "abx-spec-searchbackend", - "abx-sqlitefts-search", - "abx-wget-extractor", - "abx-ytdlp-extractor", "archivebox", - "archivebox-pocket", + "pocket", "pydantic-pkgr", ] [[package]] name = "abx" version = "0.1.0" -source = { editable = "packages/abx" } +source = { editable = "archivebox/vendor/abx" } dependencies = [ { name = "django" }, { name = "pluggy" }, @@ -58,49 +59,60 @@ requires-dist = [ ] [[package]] -name = "abx-archivedotorg-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-archivedotorg-extractor" } +name = "abx-plugin-archivedotorg" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-archivedotorg" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] [[package]] -name = "abx-chrome-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-chrome-extractor" } +name = "abx-plugin-chrome" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-chrome" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, +] [[package]] -name = "abx-curl-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-curl-extractor" } +name = "abx-plugin-curl" +version = "2024.10.24" +source = { editable = "archivebox/vendor/abx-plugin-curl" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] -[[package]] -name = "abx-favicon-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-favicon-extractor" } - -[[package]] -name = "abx-git-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-git-extractor" } - -[[package]] -name = "abx-htmltotext-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-htmltotext-extractor" } - -[[package]] -name = "abx-ldap-auth" -version = "0.1.0" -source = { editable = "packages/abx-plugin-ldap-auth" } - -[[package]] -name = "abx-mercury-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-mercury-extractor" } +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, +] [[package]] name = "abx-plugin-default-binproviders" version = "2024.10.24" -source = { editable = "packages/abx-plugin-default-binproviders" } +source = { editable = "archivebox/vendor/abx-plugin-default-binproviders" } dependencies = [ { name = "abx" }, { name = "abx-spec-pydantic-pkgr" }, @@ -109,15 +121,98 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, ] [[package]] -name = "abx-plugin-npm-binprovider" +name = "abx-plugin-favicon" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-favicon" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-git" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-git" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-htmltotext" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-htmltotext" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-ldap-auth" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-ldap-auth" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/vendor/abx-spec-django" }, +] + +[[package]] +name = "abx-plugin-mercury" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-mercury" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-npm" version = "2024.10.24" -source = { editable = "packages/abx-plugin-npm-binprovider" } +source = { editable = "archivebox/vendor/abx-plugin-npm" } dependencies = [ { name = "abx" }, { name = "abx-plugin-default-binproviders" }, @@ -128,17 +223,17 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, - { name = "abx-spec-config", editable = "packages/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, ] [[package]] -name = "abx-plugin-pip-binprovider" +name = "abx-plugin-pip" version = "2024.10.24" -source = { editable = "packages/abx-plugin-pip-binprovider" } +source = { editable = "archivebox/vendor/abx-plugin-pip" } dependencies = [ { name = "abx" }, { name = "abx-plugin-default-binproviders" }, @@ -150,18 +245,18 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, - { name = "abx-spec-config", editable = "packages/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "django", specifier = ">=5.0.0" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, ] [[package]] -name = "abx-plugin-playwright-binprovider" -version = "2024.10.24" -source = { editable = "packages/abx-plugin-playwright-binprovider" } +name = "abx-plugin-playwright" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-playwright" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -172,52 +267,210 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "abx-spec-config", editable = "packages/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "pydantic", specifier = ">=2.4.2" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, ] [[package]] -name = "abx-pocket-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-pocket-extractor" } +name = "abx-plugin-pocket" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-pocket" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "pocket" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "pocket", virtual = "archivebox/vendor/pocket" }, +] [[package]] -name = "abx-puppeteer-binprovider" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-puppeteer-binprovider" } +name = "abx-plugin-puppeteer" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-puppeteer" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, +] [[package]] -name = "abx-readability-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-readability-extractor" } +name = "abx-plugin-readability" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-readability" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] [[package]] -name = "abx-readwise-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-readwise-extractor" } +name = "abx-plugin-readwise" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-readwise" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] [[package]] -name = "abx-ripgrep-search" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-ripgrep-search" } +name = "abx-plugin-ripgrep-search" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-ripgrep-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-searchbackend" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, +] [[package]] -name = "abx-singlefile-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-singlefile-extractor" } +name = "abx-plugin-singlefile" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-singlefile" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, +] [[package]] -name = "abx-sonic-search" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-sonic-search" } +name = "abx-plugin-sonic-search" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-sonic-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-sqlitefts-search" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-sqlitefts-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-searchbackend" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, +] + +[[package]] +name = "abx-plugin-title" +version = "2024.10.27" +source = { editable = "archivebox/vendor/abx-plugin-title" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-wget" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-wget" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-ytdlp" +version = "2024.10.28" +source = { editable = "archivebox/vendor/abx-plugin-ytdlp" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, +] [[package]] name = "abx-spec-archivebox" version = "0.1.0" -source = { editable = "packages/abx-spec-archivebox" } +source = { editable = "archivebox/vendor/abx-spec-archivebox" } dependencies = [ { name = "abx" }, { name = "django" }, @@ -225,14 +478,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, + { name = "abx", editable = "archivebox/vendor/abx" }, { name = "django", specifier = ">=5.1.1,<6.0" }, ] [[package]] name = "abx-spec-config" -version = "0.0.1" -source = { editable = "packages/abx-spec-config" } +version = "0.1.0" +source = { editable = "archivebox/vendor/abx-spec-config" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -243,7 +496,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, + { name = "abx", editable = "archivebox/vendor/abx" }, { name = "pydantic", specifier = ">=2.9.2" }, { name = "pydantic-settings", specifier = ">=2.6.0" }, { name = "python-benedict", specifier = ">=0.34.0" }, @@ -253,7 +506,7 @@ requires-dist = [ [[package]] name = "abx-spec-django" version = "0.1.0" -source = { editable = "packages/abx-spec-django" } +source = { editable = "archivebox/vendor/abx-spec-django" } dependencies = [ { name = "abx" }, { name = "django" }, @@ -261,14 +514,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, + { name = "abx", editable = "archivebox/vendor/abx" }, { name = "django", specifier = ">=5.1.1,<6.0" }, ] [[package]] name = "abx-spec-extractor" version = "0.1.0" -source = { editable = "packages/abx-spec-extractor" } +source = { editable = "archivebox/vendor/abx-spec-extractor" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -277,7 +530,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, + { name = "abx", editable = "archivebox/vendor/abx" }, { name = "pydantic", specifier = ">=2.5.0" }, { name = "python-benedict", specifier = ">=0.26.0" }, ] @@ -285,7 +538,7 @@ requires-dist = [ [[package]] name = "abx-spec-pydantic-pkgr" version = "0.1.0" -source = { editable = "packages/abx-spec-pydantic-pkgr" } +source = { editable = "archivebox/vendor/abx-spec-pydantic-pkgr" } dependencies = [ { name = "abx" }, { name = "pydantic-pkgr" }, @@ -293,14 +546,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, ] [[package]] name = "abx-spec-searchbackend" version = "0.1.0" -source = { editable = "packages/abx-spec-searchbackend" } +source = { editable = "archivebox/vendor/abx-spec-searchbackend" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -309,26 +562,11 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, + { name = "abx", editable = "archivebox/vendor/abx" }, { name = "pydantic", specifier = ">=2.5.0" }, { name = "python-benedict", specifier = ">=0.26.0" }, ] -[[package]] -name = "abx-sqlitefts-search" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-sqlitefts-search" } - -[[package]] -name = "abx-wget-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-wget-extractor" } - -[[package]] -name = "abx-ytdlp-extractor" -version = "0.1.0" -source = { virtual = "packages/abx-plugin-ytdlp-extractor" } - [[package]] name = "alabaster" version = "1.0.0" @@ -411,10 +649,27 @@ version = "0.8.5rc53" source = { editable = "." } dependencies = [ { name = "abx" }, + { name = "abx-plugin-archivedotorg" }, + { name = "abx-plugin-chrome" }, + { name = "abx-plugin-curl" }, { name = "abx-plugin-default-binproviders" }, - { name = "abx-plugin-npm-binprovider" }, - { name = "abx-plugin-pip-binprovider" }, - { name = "abx-plugin-playwright-binprovider" }, + { name = "abx-plugin-favicon" }, + { name = "abx-plugin-git" }, + { name = "abx-plugin-htmltotext" }, + { name = "abx-plugin-ldap-auth" }, + { name = "abx-plugin-mercury" }, + { name = "abx-plugin-npm" }, + { name = "abx-plugin-pip" }, + { name = "abx-plugin-playwright" }, + { name = "abx-plugin-puppeteer" }, + { name = "abx-plugin-readability" }, + { name = "abx-plugin-ripgrep-search" }, + { name = "abx-plugin-singlefile" }, + { name = "abx-plugin-sonic-search" }, + { name = "abx-plugin-sqlitefts-search" }, + { name = "abx-plugin-title" }, + { name = "abx-plugin-wget" }, + { name = "abx-plugin-ytdlp" }, { name = "abx-spec-archivebox" }, { name = "abx-spec-config" }, { name = "abx-spec-django" }, @@ -442,6 +697,7 @@ dependencies = [ { name = "feedparser" }, { name = "ipython" }, { name = "mypy-extensions" }, + { name = "platformdirs" }, { name = "pluggy" }, { name = "psutil" }, { name = "py-machineid" }, @@ -500,17 +756,34 @@ dev = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "packages/abx" }, - { name = "abx-plugin-default-binproviders", editable = "packages/abx-plugin-default-binproviders" }, - { name = "abx-plugin-npm-binprovider", editable = "packages/abx-plugin-npm-binprovider" }, - { name = "abx-plugin-pip-binprovider", editable = "packages/abx-plugin-pip-binprovider" }, - { name = "abx-plugin-playwright-binprovider", editable = "packages/abx-plugin-playwright-binprovider" }, - { name = "abx-spec-archivebox", editable = "packages/abx-spec-archivebox" }, - { name = "abx-spec-config", editable = "packages/abx-spec-config" }, - { name = "abx-spec-django", editable = "packages/abx-spec-django" }, - { name = "abx-spec-extractor", editable = "packages/abx-spec-extractor" }, - { name = "abx-spec-pydantic-pkgr", editable = "packages/abx-spec-pydantic-pkgr" }, - { name = "abx-spec-searchbackend", editable = "packages/abx-spec-searchbackend" }, + { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx-plugin-archivedotorg", editable = "archivebox/vendor/abx-plugin-archivedotorg" }, + { name = "abx-plugin-chrome", editable = "archivebox/vendor/abx-plugin-chrome" }, + { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, + { name = "abx-plugin-favicon", editable = "archivebox/vendor/abx-plugin-favicon" }, + { name = "abx-plugin-git", editable = "archivebox/vendor/abx-plugin-git" }, + { name = "abx-plugin-htmltotext", editable = "archivebox/vendor/abx-plugin-htmltotext" }, + { name = "abx-plugin-ldap-auth", editable = "archivebox/vendor/abx-plugin-ldap-auth" }, + { name = "abx-plugin-mercury", editable = "archivebox/vendor/abx-plugin-mercury" }, + { name = "abx-plugin-npm", editable = "archivebox/vendor/abx-plugin-npm" }, + { name = "abx-plugin-pip", editable = "archivebox/vendor/abx-plugin-pip" }, + { name = "abx-plugin-playwright", editable = "archivebox/vendor/abx-plugin-playwright" }, + { name = "abx-plugin-puppeteer", editable = "archivebox/vendor/abx-plugin-puppeteer" }, + { name = "abx-plugin-readability", editable = "archivebox/vendor/abx-plugin-readability" }, + { name = "abx-plugin-ripgrep-search", editable = "archivebox/vendor/abx-plugin-ripgrep-search" }, + { name = "abx-plugin-singlefile", editable = "archivebox/vendor/abx-plugin-singlefile" }, + { name = "abx-plugin-sonic-search", editable = "archivebox/vendor/abx-plugin-sonic-search" }, + { name = "abx-plugin-sqlitefts-search", editable = "archivebox/vendor/abx-plugin-sqlitefts-search" }, + { name = "abx-plugin-title", editable = "archivebox/vendor/abx-plugin-title" }, + { name = "abx-plugin-wget", editable = "archivebox/vendor/abx-plugin-wget" }, + { name = "abx-plugin-ytdlp", editable = "archivebox/vendor/abx-plugin-ytdlp" }, + { name = "abx-spec-archivebox", editable = "archivebox/vendor/abx-spec-archivebox" }, + { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/vendor/abx-spec-django" }, + { name = "abx-spec-extractor", editable = "archivebox/vendor/abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, { name = "archivebox", extras = ["sonic", "ldap"], marker = "extra == 'all'" }, { name = "atomicwrites", specifier = "==1.4.1" }, { name = "base32-crockford", specifier = "==0.3.0" }, @@ -534,10 +807,11 @@ requires-dist = [ { name = "feedparser", specifier = ">=6.0.11" }, { name = "ipython", specifier = ">=8.27.0" }, { name = "mypy-extensions", specifier = ">=1.0.0" }, + { name = "platformdirs", specifier = ">=4.3.6" }, { name = "pluggy", specifier = ">=1.5.0" }, { name = "psutil", specifier = ">=6.0.0" }, { name = "py-machineid", specifier = ">=0.6.0" }, - { name = "pydantic-pkgr", editable = "packages/pydantic-pkgr" }, + { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, { name = "pydantic-settings", specifier = ">=2.5.2" }, { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" }, { name = "python-crontab", specifier = ">=3.2.0" }, @@ -581,17 +855,6 @@ dev = [ { name = "wheel", specifier = ">=0.44.0" }, ] -[[package]] -name = "archivebox-pocket" -version = "0.3.7" -source = { editable = "packages/archivebox-pocket" } -dependencies = [ - { name = "requests" }, -] - -[package.metadata] -requires-dist = [{ name = "requests", specifier = ">=2.32.3" }] - [[package]] name = "asgiref" version = "3.8.1" @@ -2247,6 +2510,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "pocket" +version = "0.3.7" +source = { virtual = "archivebox/vendor/pocket" } + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -2465,7 +2733,7 @@ wheels = [ [[package]] name = "pydantic-pkgr" version = "0.5.4" -source = { editable = "packages/pydantic-pkgr" } +source = { editable = "archivebox/vendor/pydantic-pkgr" } dependencies = [ { name = "platformdirs" }, { name = "pydantic" }, @@ -2497,7 +2765,7 @@ requires-dist = [ { name = "platformdirs", specifier = ">=4.3.6" }, { name = "pydantic", specifier = ">=2.7.1" }, { name = "pydantic-core", specifier = ">=2.18.2" }, - { name = "pydantic-pkgr", extras = ["pyinfra", "ansible"], marker = "extra == 'all'", editable = "packages/pydantic-pkgr" }, + { name = "pydantic-pkgr", extras = ["pyinfra", "ansible"], marker = "extra == 'all'", editable = "archivebox/vendor/pydantic-pkgr" }, { name = "pyinfra", marker = "extra == 'pyinfra'", specifier = ">=2.6.1" }, { name = "typing-extensions", specifier = ">=4.11.0" }, ] @@ -3296,14 +3564,14 @@ wheels = [ [[package]] name = "typeguard" -version = "4.3.0" +version = "4.4.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8d/e1/3178b3e5369a98239ed7301e3946747048c66f4023163d55918f11b82d4e/typeguard-4.3.0.tar.gz", hash = "sha256:92ee6a0aec9135181eae6067ebd617fd9de8d75d714fb548728a4933b1dea651", size = 73374 } +sdist = { url = "https://files.pythonhosted.org/packages/79/5a/91b7c8cfc2e96962442abc9d65c650436dd831910b4d7878980d6596fb98/typeguard-4.4.0.tar.gz", hash = "sha256:463bd8697a65a4aa576a63767c369b1ecfba8a5ba735edfe3223127b6ecfa28c", size = 74399 } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/de/be0ba39ee73760bf33329b7c6f95bc67e96593c69c881671e312538e24bb/typeguard-4.3.0-py3-none-any.whl", hash = "sha256:4d24c5b39a117f8a895b9da7a9b3114f04eb63bade45a4492de49b175b6f7dfa", size = 35385 }, + { url = "https://files.pythonhosted.org/packages/61/a3/00203767544b597a9e3c57b29a84967b3230f00bdd9aa6a52a73187043b4/typeguard-4.4.0-py3-none-any.whl", hash = "sha256:8ca34c14043f53b2caae7040549ba431770869bcd6287cfa8239db7ecb882b4a", size = 35736 }, ] [[package]] From 70926f1d9fe725aa31fd4a8ff8c367eb6060f6c3 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 19:06:38 -0700 Subject: [PATCH 09/25] replace os.access with os.path.isdir --- archivebox/core/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/archivebox/core/views.py b/archivebox/core/views.py index a56f93bc..e425c8fe 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -68,7 +68,7 @@ class SnapshotView(View): and embed_path and os.access(abs_path, os.R_OK) and abs_path.exists()): - if abs_path.is_dir() and not any(abs_path.glob('*.*')): + if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')): continue result_info = { @@ -102,7 +102,7 @@ class SnapshotView(View): # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list snap_dir = Path(snapshot.link_dir) - assert os.access(snap_dir, os.R_OK) and os.access(snap_dir, os.X_OK) + assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK) for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')): extension = result_file.suffix.lstrip('.').lower() From 6530d1f4bf578d31d5ce2261993c13c1c4f4c304 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 19:56:49 -0700 Subject: [PATCH 10/25] remove vendored copy of pocket and add [debug] group of pkgs for running with DEBUG=True --- archivebox/vendor/__init__.py | 4 +++- archivebox/vendor/pocket | 1 - pyproject.toml | 29 ++++++++++++++++------------- 3 files changed, 19 insertions(+), 15 deletions(-) delete mode 160000 archivebox/vendor/pocket diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py index de31354a..3b120cba 100644 --- a/archivebox/vendor/__init__.py +++ b/archivebox/vendor/__init__.py @@ -7,14 +7,16 @@ VENDOR_DIR = Path(__file__).parent VENDORED_LIBS = [ 'abx', 'pydantic-pkgr', - 'pocket', ] +# scan ./vendor and add all dirs present to list of available VENDORED_LIBS for subdir in reversed(sorted(VENDOR_DIR.iterdir())): if subdir.is_dir() and subdir.name not in VENDORED_LIBS and not subdir.name.startswith('_'): VENDORED_LIBS.append(subdir.name) + def load_vendored_libs(): + """Add archivebox/vendor to sys.path and import all vendored libraries present within""" if str(VENDOR_DIR) not in sys.path: sys.path.append(str(VENDOR_DIR)) diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket deleted file mode 160000 index b377c089..00000000 --- a/archivebox/vendor/pocket +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b377c08988fb8ff81a6fdcd4f53ec54948fc16c5 diff --git a/pyproject.toml b/pyproject.toml index 58e7d82b..3fe42605 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,10 +79,11 @@ dependencies = [ "django-taggit==6.1.0", "base32-crockford==0.3.0", "platformdirs>=4.3.6", - ############# Plugin Dependencies ################ - # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", "pydantic-pkgr>=0.5.4", - + "pocket>=0.3.6", + "sonic-client>=1.0.0", + "yt-dlp>=2024.8.6", # for: media" + ############# Plugin Dependencies ################ "abx>=0.1.0", "abx-spec-pydantic-pkgr>=0.1.0", @@ -118,9 +119,6 @@ dependencies = [ "abx-plugin-readability>=2024.10.28", "abx-plugin-mercury>=2024.10.28", "abx-plugin-htmltotext>=2024.10.28", - - "sonic-client>=1.0.0", - "yt-dlp>=2024.8.6", # for: media" ] [project.optional-dependencies] @@ -136,8 +134,15 @@ ldap = [ "python-ldap>=3.4.3", "django-auth-ldap>=4.1.0", ] +debug = [ + # libs needed to run archivebox server with DEBUG=True + "django-debug-toolbar>=4.4.6", + "requests-tracker>=0.3.3", + "djdt_flamegraph>=0.2.13", + "ipdb>=0.13.13", +] all = [ - "archivebox[sonic,ldap]" + "archivebox[sonic,ldap,debug]" ] [tool.uv] @@ -154,9 +159,9 @@ dev-dependencies = [ "sphinx-rtd-theme>=2.0.0", ### DEBUGGING "django-debug-toolbar>=4.4.6", + "requests-tracker>=0.3.3", "djdt_flamegraph>=0.2.13", "ipdb>=0.13.13", - "requests-tracker>=0.3.3", "logfire[django]>=0.51.0", "opentelemetry-instrumentation-django>=0.47b0", "opentelemetry-instrumentation-sqlite3>=0.47b0", @@ -173,6 +178,8 @@ dev-dependencies = [ ] [tool.uv.sources] +pydantic-pkgr = { workspace = true } + abx = { workspace = true } abx-spec-pydantic-pkgr = { workspace = true } abx-spec-config = { workspace = true } @@ -208,9 +215,6 @@ abx-plugin-mercury = { workspace = true } abx-plugin-htmltotext = { workspace = true } -pydantic-pkgr = { workspace = true } -pocket = { workspace = true } - [tool.uv.workspace] members = ["archivebox/vendor/*"] exclude = ["archivebox/vendor/__pycache__"] @@ -228,7 +232,7 @@ package-dir = {"archivebox" = "archivebox"} line-length = 140 target-version = "py310" src = ["archivebox"] -exclude = ["*.pyi", "typings/", "migrations/", "vendor/pocket"] +exclude = ["*.pyi", "typings/", "migrations/"] # https://docs.astral.sh/ruff/rules/ [tool.ruff.lint] @@ -263,7 +267,6 @@ exclude = [ "**/node_modules", "**/__pycache__", "**/migrations", - "archivebox/vendor/pocket", ] stubPath = "./archivebox/typings" venvPath = "." From 001056f29275935d4e34e61ebac5b64c35dba609 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 20:00:21 -0700 Subject: [PATCH 11/25] remove vendored copy of pydantic-pkgr --- .gitmodules | 6 ------ archivebox/vendor/__init__.py | 2 +- archivebox/vendor/pydantic-pkgr | 1 - pyproject.toml | 2 +- 4 files changed, 2 insertions(+), 9 deletions(-) delete mode 160000 archivebox/vendor/pydantic-pkgr diff --git a/.gitmodules b/.gitmodules index db744b8a..e260fdf5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,3 @@ [submodule "docs"] path = docs url = https://github.com/ArchiveBox/ArchiveBox.wiki.git -[submodule "archivebox/vendor/pocket"] - path = archivebox/vendor/pocket - url = https://github.com/tapanpandita/pocket -[submodule "archivebox/vendor/pydantic-pkgr"] - path = archivebox/vendor/pydantic-pkgr - url = https://github.com/ArchiveBox/pydantic-pkgr diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py index 3b120cba..e2e97a7c 100644 --- a/archivebox/vendor/__init__.py +++ b/archivebox/vendor/__init__.py @@ -6,7 +6,7 @@ VENDOR_DIR = Path(__file__).parent VENDORED_LIBS = [ 'abx', - 'pydantic-pkgr', + # 'pydantic-pkgr', ] # scan ./vendor and add all dirs present to list of available VENDORED_LIBS diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr deleted file mode 160000 index a116eaef..00000000 --- a/archivebox/vendor/pydantic-pkgr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a116eaef7f090dc872b18e82b5a538313075ded6 diff --git a/pyproject.toml b/pyproject.toml index 3fe42605..065bff61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -178,7 +178,7 @@ dev-dependencies = [ ] [tool.uv.sources] -pydantic-pkgr = { workspace = true } +# pydantic-pkgr = { workspace = true } abx = { workspace = true } abx-spec-pydantic-pkgr = { workspace = true } From 7d7586765026ab39192caef299c8a164e791505d Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 20:01:11 -0700 Subject: [PATCH 12/25] bump rc version since there have been tons of changes --- pyproject.toml | 2 +- uv.lock | 459 ++++--------------------------------------------- 2 files changed, 39 insertions(+), 422 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 065bff61..632cc166 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "archivebox" -version = "0.8.5rc53" +version = "0.8.6rc0" requires-python = ">=3.10" description = "Self-hosted internet archiving solution." authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] diff --git a/uv.lock b/uv.lock index 87085d19..349d5bd2 100644 --- a/uv.lock +++ b/uv.lock @@ -39,8 +39,6 @@ members = [ "abx-spec-pydantic-pkgr", "abx-spec-searchbackend", "archivebox", - "pocket", - "pydantic-pkgr", ] [[package]] @@ -123,7 +121,7 @@ dependencies = [ requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -227,7 +225,7 @@ requires-dist = [ { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -250,7 +248,7 @@ requires-dist = [ { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "django", specifier = ">=5.0.0" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -271,7 +269,7 @@ requires-dist = [ { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "pydantic", specifier = ">=2.4.2" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -288,7 +286,7 @@ dependencies = [ requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "pocket", virtual = "archivebox/vendor/pocket" }, + { name = "pocket", specifier = ">=0.3.6" }, ] [[package]] @@ -307,7 +305,7 @@ requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -373,7 +371,7 @@ requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -394,7 +392,7 @@ requires-dist = [ { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -464,7 +462,7 @@ requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -547,7 +545,7 @@ dependencies = [ [package.metadata] requires-dist = [ { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] @@ -585,49 +583,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] -[[package]] -name = "ansible" -version = "10.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "ansible-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d7/23/ae30b280ebad1f19fa012c0410aaf7d50cd741a5786bd60a2ecba42d2cd4/ansible-10.5.0.tar.gz", hash = "sha256:ba2045031a7d60c203b6e5fe1f8eaddd53ae076f7ada910e636494384135face", size = 40391062 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/33/4cb64286f44cd36753cd15ef636be6c9e40be331e14e97caca74cb7a3242/ansible-10.5.0-py3-none-any.whl", hash = "sha256:1d10bddba58f1edd0fe0b8e0387e0fafc519535066bb3c919c33b6ea3ec32a0f", size = 48977627 }, -] - -[[package]] -name = "ansible-core" -version = "2.17.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, - { name = "jinja2" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "resolvelib" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/39/96/02a6d1d16ef3b08d53e23db519fbb31641b2767404b674f3ea71c7c1ac3b/ansible_core-2.17.5.tar.gz", hash = "sha256:ae7f51fd13dc9d57c9bcd43ef23f9c255ca8f18f4b5c0011a4f9b724d92c5a8e", size = 3097858 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/4f/5c344dc52327766fb286771d492481c2c60eace9697497b250e1d79b1e40/ansible_core-2.17.5-py3-none-any.whl", hash = "sha256:10f165b475cf2bc8d886e532cadb32c52ee6a533649793101d3166bca9bd3ea3", size = 2193938 }, -] - -[[package]] -name = "ansible-runner" -version = "2.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "packaging" }, - { name = "pexpect" }, - { name = "python-daemon" }, - { name = "pyyaml" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e0/b4/842698d5c17b3cae7948df4c812e01f4199dfb9f35b1c0bb51cf2fe5c246/ansible-runner-2.4.0.tar.gz", hash = "sha256:82d02b2548830f37a53517b65c823c4af371069406c7d213b5c9041d45e0c5b6", size = 148802 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/58/46/44577e2e58de8b9c9398e1ee08b6c697bb2581446209cbfd2639cced66f5/ansible_runner-2.4.0-py3-none-any.whl", hash = "sha256:a3f592ae4cdfa62a72ad15de60da9c8210f376d67f495c4a78d4cf1dc7ccdf89", size = 79678 }, -] - [[package]] name = "anyio" version = "4.6.2.post1" @@ -699,6 +654,7 @@ dependencies = [ { name = "mypy-extensions" }, { name = "platformdirs" }, { name = "pluggy" }, + { name = "pocket" }, { name = "psutil" }, { name = "py-machineid" }, { name = "pydantic-pkgr" }, @@ -721,7 +677,17 @@ dependencies = [ [package.optional-dependencies] all = [ { name = "django-auth-ldap" }, + { name = "django-debug-toolbar" }, + { name = "djdt-flamegraph" }, + { name = "ipdb" }, { name = "python-ldap" }, + { name = "requests-tracker" }, +] +debug = [ + { name = "django-debug-toolbar" }, + { name = "djdt-flamegraph" }, + { name = "ipdb" }, + { name = "requests-tracker" }, ] ldap = [ { name = "django-auth-ldap" }, @@ -784,7 +750,7 @@ requires-dist = [ { name = "abx-spec-extractor", editable = "archivebox/vendor/abx-spec-extractor" }, { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, - { name = "archivebox", extras = ["sonic", "ldap"], marker = "extra == 'all'" }, + { name = "archivebox", extras = ["sonic", "ldap", "debug"], marker = "extra == 'all'" }, { name = "atomicwrites", specifier = "==1.4.1" }, { name = "base32-crockford", specifier = "==0.3.0" }, { name = "channels", extras = ["daphne"], specifier = ">=4.1.0" }, @@ -794,6 +760,7 @@ requires-dist = [ { name = "django-admin-data-views", specifier = ">=0.4.1" }, { name = "django-auth-ldap", marker = "extra == 'ldap'", specifier = ">=4.1.0" }, { name = "django-charid-field", specifier = ">=0.4" }, + { name = "django-debug-toolbar", marker = "extra == 'debug'", specifier = ">=4.4.6" }, { name = "django-extensions", specifier = ">=3.2.3" }, { name = "django-huey", specifier = ">=1.2.1" }, { name = "django-huey-monitor", specifier = ">=0.9.0" }, @@ -804,19 +771,23 @@ requires-dist = [ { name = "django-signal-webhooks", specifier = ">=0.3.0" }, { name = "django-stubs", specifier = ">=5.0.4" }, { name = "django-taggit", specifier = "==6.1.0" }, + { name = "djdt-flamegraph", marker = "extra == 'debug'", specifier = ">=0.2.13" }, { name = "feedparser", specifier = ">=6.0.11" }, + { name = "ipdb", marker = "extra == 'debug'", specifier = ">=0.13.13" }, { name = "ipython", specifier = ">=8.27.0" }, { name = "mypy-extensions", specifier = ">=1.0.0" }, { name = "platformdirs", specifier = ">=4.3.6" }, { name = "pluggy", specifier = ">=1.5.0" }, + { name = "pocket", specifier = ">=0.3.6" }, { name = "psutil", specifier = ">=6.0.0" }, { name = "py-machineid", specifier = ">=0.6.0" }, - { name = "pydantic-pkgr", editable = "archivebox/vendor/pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, { name = "pydantic-settings", specifier = ">=2.5.2" }, { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" }, { name = "python-crontab", specifier = ">=3.2.0" }, { name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" }, { name = "requests", specifier = ">=2.32.3" }, + { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" }, { name = "rich", specifier = ">=13.8.0" }, { name = "rich-argparse", specifier = ">=1.5.2" }, { name = "setuptools", specifier = ">=74.1.0" }, @@ -936,38 +907,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/6f/7ad1176c56c920e9841b14923f81545a4243876628312f143915561770d2/base32_crockford-0.3.0-py2.py3-none-any.whl", hash = "sha256:295ef5ffbf6ed96b6e739ffd36be98fa7e90a206dd18c39acefb15777eedfe6e", size = 5050 }, ] -[[package]] -name = "bcrypt" -version = "4.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e4/7e/d95e7d96d4828e965891af92e43b52a4cd3395dc1c1ef4ee62748d0471d0/bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221", size = 24294 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a9/81/4e8f5bc0cd947e91fb720e1737371922854da47a94bc9630454e7b2845f8/bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb", size = 471568 }, - { url = "https://files.pythonhosted.org/packages/05/d2/1be1e16aedec04bcf8d0156e01b987d16a2063d38e64c3f28030a3427d61/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00", size = 277372 }, - { url = "https://files.pythonhosted.org/packages/e3/96/7a654027638ad9b7589effb6db77eb63eba64319dfeaf9c0f4ca953e5f76/bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d", size = 273488 }, - { url = "https://files.pythonhosted.org/packages/46/54/dc7b58abeb4a3d95bab653405935e27ba32f21b812d8ff38f271fb6f7f55/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291", size = 277759 }, - { url = "https://files.pythonhosted.org/packages/ac/be/da233c5f11fce3f8adec05e8e532b299b64833cc962f49331cdd0e614fa9/bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328", size = 273796 }, - { url = "https://files.pythonhosted.org/packages/b0/b8/8b4add88d55a263cf1c6b8cf66c735280954a04223fcd2880120cc767ac3/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7", size = 311082 }, - { url = "https://files.pythonhosted.org/packages/7b/76/2aa660679abbdc7f8ee961552e4bb6415a81b303e55e9374533f22770203/bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399", size = 305912 }, - { url = "https://files.pythonhosted.org/packages/00/03/2af7c45034aba6002d4f2b728c1a385676b4eab7d764410e34fd768009f2/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060", size = 325185 }, - { url = "https://files.pythonhosted.org/packages/dc/5d/6843443ce4ab3af40bddb6c7c085ed4a8418b3396f7a17e60e6d9888416c/bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7", size = 335188 }, - { url = "https://files.pythonhosted.org/packages/cb/4c/ff8ca83d816052fba36def1d24e97d9a85739b9bbf428c0d0ecd296a07c8/bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458", size = 156481 }, - { url = "https://files.pythonhosted.org/packages/65/f1/e09626c88a56cda488810fb29d5035f1662873777ed337880856b9d204ae/bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5", size = 151336 }, - { url = "https://files.pythonhosted.org/packages/96/86/8c6a84daed4dd878fbab094400c9174c43d9b838ace077a2f8ee8bc3ae12/bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841", size = 472414 }, - { url = "https://files.pythonhosted.org/packages/f6/05/e394515f4e23c17662e5aeb4d1859b11dc651be01a3bd03c2e919a155901/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68", size = 277599 }, - { url = "https://files.pythonhosted.org/packages/4b/3b/ad784eac415937c53da48983756105d267b91e56aa53ba8a1b2014b8d930/bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe", size = 273491 }, - { url = "https://files.pythonhosted.org/packages/cc/14/b9ff8e0218bee95e517b70e91130effb4511e8827ac1ab00b4e30943a3f6/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2", size = 277934 }, - { url = "https://files.pythonhosted.org/packages/3e/d0/31938bb697600a04864246acde4918c4190a938f891fd11883eaaf41327a/bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c", size = 273804 }, - { url = "https://files.pythonhosted.org/packages/e7/c3/dae866739989e3f04ae304e1201932571708cb292a28b2f1b93283e2dcd8/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae", size = 311275 }, - { url = "https://files.pythonhosted.org/packages/5d/2c/019bc2c63c6125ddf0483ee7d914a405860327767d437913942b476e9c9b/bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d", size = 306355 }, - { url = "https://files.pythonhosted.org/packages/75/fe/9e137727f122bbe29771d56afbf4e0dbc85968caa8957806f86404a5bfe1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e", size = 325381 }, - { url = "https://files.pythonhosted.org/packages/1a/d4/586b9c18a327561ea4cd336ff4586cca1a7aa0f5ee04e23a8a8bb9ca64f1/bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8", size = 335685 }, - { url = "https://files.pythonhosted.org/packages/24/55/1a7127faf4576138bb278b91e9c75307490178979d69c8e6e273f74b974f/bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34", size = 155857 }, - { url = "https://files.pythonhosted.org/packages/1c/2a/c74052e54162ec639266d91539cca7cbf3d1d3b8b36afbfeaee0ea6a1702/bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9", size = 151717 }, - { url = "https://files.pythonhosted.org/packages/09/97/01026e7b1b7f8aeb41514408eca1137c0f8aef9938335e3bc713f82c282e/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a", size = 275924 }, - { url = "https://files.pythonhosted.org/packages/ca/46/03eb26ea3e9c12ca18d1f3bf06199f7d72ce52e68f2a1ebcfd8acff9c472/bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db", size = 272242 }, -] - [[package]] name = "beautifulsoup4" version = "4.12.3" @@ -1257,18 +1196,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/9b/08c0432272d77b04803958a4598a51e2a4b51c06640af8b8f0f908c18bf2/charset_normalizer-3.4.0-py3-none-any.whl", hash = "sha256:fe9f97feb71aa9896b81973a7bbada8c49501dc73e58a10fcef6663af95e5079", size = 49446 }, ] -[[package]] -name = "click" -version = "8.1.7" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "colorama", marker = "platform_system == 'Windows'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", size = 97941 }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -1287,15 +1214,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b1/92/dfd892312d822f36c55366118b95d914e5f16de11044a27cf10a7d71bbbf/commonmark-0.9.1-py2.py3-none-any.whl", hash = "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9", size = 51068 }, ] -[[package]] -name = "configparser" -version = "7.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a5/2e/a8d83652990ecb5df54680baa0c53d182051d9e164a25baa0582363841d1/configparser-7.1.0.tar.gz", hash = "sha256:eb82646c892dbdf773dae19c633044d163c3129971ae09b49410a303b8e0a5f7", size = 50122 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ee/df/1514580907b0bac0970415e5e24ef96a9c1fa71dcf2aa0139045b58fae9a/configparser-7.1.0-py3-none-any.whl", hash = "sha256:98e374573c4e10e92399651e3ba1c47a438526d633c44ee96143dec26dad4299", size = 17074 }, -] - [[package]] name = "constantly" version = "23.10.4" @@ -1401,15 +1319,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/8d/778b7d51b981a96554f29136cd59ca7880bf58094338085bcf2a979a0e6a/Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c", size = 9561 }, ] -[[package]] -name = "distro" -version = "1.9.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 }, -] - [[package]] name = "django" version = "5.1.2" @@ -1727,53 +1636,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/76/0f/d8a8152e720cbcad890e56ee98639ff489f1992869b4cf304c3fa24d4bcc/ftfy-6.3.0-py3-none-any.whl", hash = "sha256:17aca296801f44142e3ff2c16f93fbf6a87609ebb3704a9a41dd5d4903396caf", size = 44778 }, ] -[[package]] -name = "gevent" -version = "24.10.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi", marker = "platform_python_implementation == 'CPython' and sys_platform == 'win32'" }, - { name = "greenlet", marker = "platform_python_implementation == 'CPython'" }, - { name = "zope-event" }, - { name = "zope-interface" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/70/f0/be10ed5d7721ed2317d7feb59e167603217156c2a6d57f128523e24e673d/gevent-24.10.3.tar.gz", hash = "sha256:aa7ee1bd5cabb2b7ef35105f863b386c8d5e332f754b60cfc354148bd70d35d1", size = 6108837 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6b/6f/a2100e7883c7bdfc2b45cb60b310ca748762a21596258b9dd01c5c093dbc/gevent-24.10.3-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:d7a1ad0f2da582f5bd238bca067e1c6c482c30c15a6e4d14aaa3215cbb2232f3", size = 3014382 }, - { url = "https://files.pythonhosted.org/packages/7a/b1/460e4884ed6185d9eb9c4c2e9639d2b254197e46513301c0f63dec22dc90/gevent-24.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4e526fdc279c655c1e809b0c34b45844182c2a6b219802da5e411bd2cf5a8ad", size = 4853460 }, - { url = "https://files.pythonhosted.org/packages/ca/f6/7ded98760d381229183ecce8db2edcce96f13e23807d31a90c66dae85304/gevent-24.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57a5c4e0bdac482c5f02f240d0354e61362df73501ef6ebafce8ef635cad7527", size = 4977636 }, - { url = "https://files.pythonhosted.org/packages/7d/21/7b928e6029eedb93ef94fc0aee701f497af2e601f0ec00aac0e72e3f450e/gevent-24.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d67daed8383326dc8b5e58d88e148d29b6b52274a489e383530b0969ae7b9cb9", size = 5058031 }, - { url = "https://files.pythonhosted.org/packages/00/98/12c03fd004fbeeca01276ffc589f5a368fd741d02582ab7006d1bdef57e7/gevent-24.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e24ffea72e27987979c009536fd0868e52239b44afe6cf7135ce8aafd0f108e", size = 6683694 }, - { url = "https://files.pythonhosted.org/packages/64/4c/ea14d971452d3da09e49267e052d8312f112c7835120aed78d22ef14efee/gevent-24.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c1d80090485da1ea3d99205fe97908b31188c1f4857f08b333ffaf2de2e89d18", size = 5286063 }, - { url = "https://files.pythonhosted.org/packages/39/3f/397efff27e637d7306caa00d1560512c44028c25c70be1e72c46b79b1b66/gevent-24.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0c129f81d60cda614acb4b0c5731997ca05b031fb406fcb58ad53a7ade53b13", size = 6817462 }, - { url = "https://files.pythonhosted.org/packages/aa/5d/19939eaa7c5b7c0f37e0a0665a911ddfe1e35c25c512446fc356a065c16e/gevent-24.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:26ca7a6b42d35129617025ac801135118333cad75856ffc3217b38e707383eba", size = 1566631 }, - { url = "https://files.pythonhosted.org/packages/6e/01/1be5cf013826d8baae235976d6a94f3628014fd2db7c071aeec13f82b4d1/gevent-24.10.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:68c3a0d8402755eba7f69022e42e8021192a721ca8341908acc222ea597029b6", size = 2966909 }, - { url = "https://files.pythonhosted.org/packages/fe/3e/7fa9ab023f24d8689e2c77951981f8ea1f25089e0349a0bf8b35ee9b9277/gevent-24.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d850a453d66336272be4f1d3a8126777f3efdaea62d053b4829857f91e09755", size = 4913247 }, - { url = "https://files.pythonhosted.org/packages/db/63/6e40eaaa3c2abd1561faff11dc3e6781f8c25e975354b8835762834415af/gevent-24.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e58ee3723f1fbe07d66892f1caa7481c306f653a6829b6fd16cb23d618a5915", size = 5049036 }, - { url = "https://files.pythonhosted.org/packages/94/89/158bc32cdc898dda0481040ac18650022e73133d93460c5af56ca622fe9a/gevent-24.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b52382124eca13135a3abe4f65c6bd428656975980a48e51b17aeab68bdb14db", size = 5107299 }, - { url = "https://files.pythonhosted.org/packages/64/91/1abe62ee350fdfac186d33f615d0d3a0b3b140e7ccf23c73547aa0deec44/gevent-24.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ca2266e08f43c0e22c028801dff7d92a0b102ef20e4caeb6a46abfb95f6a328", size = 6819625 }, - { url = "https://files.pythonhosted.org/packages/92/8b/0b2fe0d36b7c4d463e46cc68eaf6c14488bd7d86cc37e995c64a0ff7d02f/gevent-24.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d758f0d4dbf32502ec87bb9b536ca8055090a16f8305f0ada3ce6f34e70f2fd7", size = 5474079 }, - { url = "https://files.pythonhosted.org/packages/12/7b/9f5abbf0021a50321314f850697e0f46d2e5081168223af2d8544af9d19f/gevent-24.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0de6eb3d55c03138fda567d9bfed28487ce5d0928c5107549767a93efdf2be26", size = 6901323 }, - { url = "https://files.pythonhosted.org/packages/8a/63/607715c621ae78ed581b7ba36d076df63feeb352993d521327f865056771/gevent-24.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:385710355eadecdb70428a5ae3e7e5a45dcf888baa1426884588be9d25ac4290", size = 1549468 }, - { url = "https://files.pythonhosted.org/packages/d9/e4/4edbe17001bb3e6fade4ad2d85ca8f9e4eabcbde4aa29aa6889281616e3e/gevent-24.10.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3ad8fb70aa0ebc935729c9699ac31b210a49b689a7b27b7ac9f91676475f3f53", size = 2970952 }, - { url = "https://files.pythonhosted.org/packages/3c/a6/ce0824fe9398ba6b00028a74840f12be1165d5feaacdc028ea953db3d6c3/gevent-24.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f18689f7a70d2ed0e75bad5036ec3c89690a493d4cfac8d7cdb258ac04b132bd", size = 5172230 }, - { url = "https://files.pythonhosted.org/packages/25/d4/9002cfb585bfa52c860ed4b1349d1a6400bdf2df9f1bd21df5ff33eea33c/gevent-24.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7f4f171d4d2018170454d84c934842e1b5f6ce7468ba298f6e7f7cff15000a3", size = 5338394 }, - { url = "https://files.pythonhosted.org/packages/0c/98/222f1a14f22ad2d1cbcc37edb74095264c1f9c7ab49e6423693383462b8a/gevent-24.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7021e26d70189b33c27173d4173f27bf4685d6b6f1c0ea50e5335f8491cb110c", size = 5437989 }, - { url = "https://files.pythonhosted.org/packages/bf/e8/cbb46afea3c7ecdc7289e15cb4a6f89903f4f9754a27ca320d3e465abc78/gevent-24.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34aea15f9c79f27a8faeaa361bc1e72c773a9b54a1996a2ec4eefc8bcd59a824", size = 6838539 }, - { url = "https://files.pythonhosted.org/packages/69/c3/e43e348f23da404a6d4368a14453ed097cdfca97d5212eaceb987d04a0e1/gevent-24.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:8af65a4d4feaec6042c666d22c322a310fba3b47e841ad52f724b9c3ce5da48e", size = 5513842 }, - { url = "https://files.pythonhosted.org/packages/c2/76/84b7c19c072a80900118717a85236859127d630cdf8b079fe42f19649f12/gevent-24.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:89c4115e3f5ada55f92b61701a46043fe42f702b5af863b029e4c1a76f6cc2d4", size = 6927374 }, - { url = "https://files.pythonhosted.org/packages/5e/69/0ab1b04c363547058fb5035275c144957b80b36cb6aee715fe6181b0cee9/gevent-24.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:1ce6dab94c0b0d24425ba55712de2f8c9cb21267150ca63f5bb3a0e1f165da99", size = 1546701 }, - { url = "https://files.pythonhosted.org/packages/f7/2d/c783583d7999cd2f2e7aa2d6a1c333d663003ca61255a89ff6a891be95f4/gevent-24.10.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:f147e38423fbe96e8731f60a63475b3d2cab2f3d10578d8ee9d10c507c58a2ff", size = 2962857 }, - { url = "https://files.pythonhosted.org/packages/f3/77/d3ce96fd49406f61976e9a3b6c742b97bb274d3b30c68ff190c5b5f81afd/gevent-24.10.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18e6984ec96fc95fd67488555c38ece3015be1f38b1bcceb27b7d6c36b343008", size = 5141676 }, - { url = "https://files.pythonhosted.org/packages/49/f4/f99f893770c316b9d2f03bd684947126cbed0321b89fe5423838974c2025/gevent-24.10.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:051b22e2758accfddb0457728bfc9abf8c3f2ce6bca43f1ff6e07b5ed9e49bf4", size = 5310248 }, - { url = "https://files.pythonhosted.org/packages/e3/0c/67257ba906f76ed82e8f0bd8c00c2a0687b360a1050b70db7e58dff749ab/gevent-24.10.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb5edb6433764119a664bbb148d2aea9990950aa89cc3498f475c2408d523ea3", size = 5407304 }, - { url = "https://files.pythonhosted.org/packages/35/6c/3a72da7c224b0111728130c0f1abc3ee07feff91b37e0ea83db98f4a3eaf/gevent-24.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce417bcaaab496bc9c77f75566531e9d93816262037b8b2dbb88b0fdcd66587c", size = 6818624 }, - { url = "https://files.pythonhosted.org/packages/a3/96/cc5f6ecba032a45fc312fe0db2908a893057fd81361eea93845d6c325556/gevent-24.10.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:1c3a828b033fb02b7c31da4d75014a1f82e6c072fc0523456569a57f8b025861", size = 5484356 }, - { url = "https://files.pythonhosted.org/packages/7c/97/e680b2b2f0c291ae4db9813ffbf02c22c2a0f14c8f1a613971385e29ef67/gevent-24.10.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:f2ae3efbbd120cdf4a68b7abc27a37e61e6f443c5a06ec2c6ad94c37cd8471ec", size = 6903191 }, - { url = "https://files.pythonhosted.org/packages/1b/1c/b4181957da062d1c060974ec6cb798cc24aeeb28e8cd2ece84eb4b4991f7/gevent-24.10.3-cp313-cp313-win_amd64.whl", hash = "sha256:9e1210334a9bc9f76c3d008e0785ca62214f8a54e1325f6c2ecab3b6a572a015", size = 1545117 }, - { url = "https://files.pythonhosted.org/packages/89/2b/bf4af9950b8f9abd5b4025858f6311930de550e3498bbfeb47c914701a1d/gevent-24.10.3-pp310-pypy310_pp73-macosx_11_0_universal2.whl", hash = "sha256:e534e6a968d74463b11de6c9c67f4b4bf61775fb00f2e6e0f7fcdd412ceade18", size = 1271541 }, -] - [[package]] name = "googleapis-common-protos" version = "1.65.0" @@ -1786,57 +1648,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/08/49bfe7cf737952cc1a9c43e80cc258ed45dad7f183c5b8276fc94cb3862d/googleapis_common_protos-1.65.0-py2.py3-none-any.whl", hash = "sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63", size = 220890 }, ] -[[package]] -name = "greenlet" -version = "3.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2f/ff/df5fede753cc10f6a5be0931204ea30c35fa2f2ea7a35b25bdaf4fe40e46/greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467", size = 186022 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/25/90/5234a78dc0ef6496a6eb97b67a42a8e96742a56f7dc808cb954a85390448/greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563", size = 271235 }, - { url = "https://files.pythonhosted.org/packages/7c/16/cd631fa0ab7d06ef06387135b7549fdcc77d8d859ed770a0d28e47b20972/greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83", size = 637168 }, - { url = "https://files.pythonhosted.org/packages/2f/b1/aed39043a6fec33c284a2c9abd63ce191f4f1a07319340ffc04d2ed3256f/greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0", size = 648826 }, - { url = "https://files.pythonhosted.org/packages/76/25/40e0112f7f3ebe54e8e8ed91b2b9f970805143efef16d043dfc15e70f44b/greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120", size = 644443 }, - { url = "https://files.pythonhosted.org/packages/fb/2f/3850b867a9af519794784a7eeed1dd5bc68ffbcc5b28cef703711025fd0a/greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc", size = 643295 }, - { url = "https://files.pythonhosted.org/packages/cf/69/79e4d63b9387b48939096e25115b8af7cd8a90397a304f92436bcb21f5b2/greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617", size = 599544 }, - { url = "https://files.pythonhosted.org/packages/46/1d/44dbcb0e6c323bd6f71b8c2f4233766a5faf4b8948873225d34a0b7efa71/greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7", size = 1125456 }, - { url = "https://files.pythonhosted.org/packages/e0/1d/a305dce121838d0278cee39d5bb268c657f10a5363ae4b726848f833f1bb/greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6", size = 1149111 }, - { url = "https://files.pythonhosted.org/packages/96/28/d62835fb33fb5652f2e98d34c44ad1a0feacc8b1d3f1aecab035f51f267d/greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80", size = 298392 }, - { url = "https://files.pythonhosted.org/packages/28/62/1c2665558618553c42922ed47a4e6d6527e2fa3516a8256c2f431c5d0441/greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70", size = 272479 }, - { url = "https://files.pythonhosted.org/packages/76/9d/421e2d5f07285b6e4e3a676b016ca781f63cfe4a0cd8eaecf3fd6f7a71ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159", size = 640404 }, - { url = "https://files.pythonhosted.org/packages/e5/de/6e05f5c59262a584e502dd3d261bbdd2c97ab5416cc9c0b91ea38932a901/greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e", size = 652813 }, - { url = "https://files.pythonhosted.org/packages/49/93/d5f93c84241acdea15a8fd329362c2c71c79e1a507c3f142a5d67ea435ae/greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1", size = 648517 }, - { url = "https://files.pythonhosted.org/packages/15/85/72f77fc02d00470c86a5c982b8daafdf65d38aefbbe441cebff3bf7037fc/greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383", size = 647831 }, - { url = "https://files.pythonhosted.org/packages/f7/4b/1c9695aa24f808e156c8f4813f685d975ca73c000c2a5056c514c64980f6/greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a", size = 602413 }, - { url = "https://files.pythonhosted.org/packages/76/70/ad6e5b31ef330f03b12559d19fda2606a522d3849cde46b24f223d6d1619/greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511", size = 1129619 }, - { url = "https://files.pythonhosted.org/packages/f4/fb/201e1b932e584066e0f0658b538e73c459b34d44b4bd4034f682423bc801/greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395", size = 1155198 }, - { url = "https://files.pythonhosted.org/packages/12/da/b9ed5e310bb8b89661b80cbcd4db5a067903bbcd7fc854923f5ebb4144f0/greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39", size = 298930 }, - { url = "https://files.pythonhosted.org/packages/7d/ec/bad1ac26764d26aa1353216fcbfa4670050f66d445448aafa227f8b16e80/greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d", size = 274260 }, - { url = "https://files.pythonhosted.org/packages/66/d4/c8c04958870f482459ab5956c2942c4ec35cac7fe245527f1039837c17a9/greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79", size = 649064 }, - { url = "https://files.pythonhosted.org/packages/51/41/467b12a8c7c1303d20abcca145db2be4e6cd50a951fa30af48b6ec607581/greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa", size = 663420 }, - { url = "https://files.pythonhosted.org/packages/27/8f/2a93cd9b1e7107d5c7b3b7816eeadcac2ebcaf6d6513df9abaf0334777f6/greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441", size = 658035 }, - { url = "https://files.pythonhosted.org/packages/57/5c/7c6f50cb12be092e1dccb2599be5a942c3416dbcfb76efcf54b3f8be4d8d/greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36", size = 660105 }, - { url = "https://files.pythonhosted.org/packages/f1/66/033e58a50fd9ec9df00a8671c74f1f3a320564c6415a4ed82a1c651654ba/greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9", size = 613077 }, - { url = "https://files.pythonhosted.org/packages/19/c5/36384a06f748044d06bdd8776e231fadf92fc896bd12cb1c9f5a1bda9578/greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0", size = 1135975 }, - { url = "https://files.pythonhosted.org/packages/38/f9/c0a0eb61bdf808d23266ecf1d63309f0e1471f284300ce6dac0ae1231881/greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942", size = 1163955 }, - { url = "https://files.pythonhosted.org/packages/43/21/a5d9df1d21514883333fc86584c07c2b49ba7c602e670b174bd73cfc9c7f/greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01", size = 299655 }, - { url = "https://files.pythonhosted.org/packages/f3/57/0db4940cd7bb461365ca8d6fd53e68254c9dbbcc2b452e69d0d41f10a85e/greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1", size = 272990 }, - { url = "https://files.pythonhosted.org/packages/1c/ec/423d113c9f74e5e402e175b157203e9102feeb7088cee844d735b28ef963/greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff", size = 649175 }, - { url = "https://files.pythonhosted.org/packages/a9/46/ddbd2db9ff209186b7b7c621d1432e2f21714adc988703dbdd0e65155c77/greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a", size = 663425 }, - { url = "https://files.pythonhosted.org/packages/bc/f9/9c82d6b2b04aa37e38e74f0c429aece5eeb02bab6e3b98e7db89b23d94c6/greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e", size = 657736 }, - { url = "https://files.pythonhosted.org/packages/d9/42/b87bc2a81e3a62c3de2b0d550bf91a86939442b7ff85abb94eec3fc0e6aa/greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4", size = 660347 }, - { url = "https://files.pythonhosted.org/packages/37/fa/71599c3fd06336cdc3eac52e6871cfebab4d9d70674a9a9e7a482c318e99/greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e", size = 615583 }, - { url = "https://files.pythonhosted.org/packages/4e/96/e9ef85de031703ee7a4483489b40cf307f93c1824a02e903106f2ea315fe/greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1", size = 1133039 }, - { url = "https://files.pythonhosted.org/packages/87/76/b2b6362accd69f2d1889db61a18c94bc743e961e3cab344c2effaa4b4a25/greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c", size = 1160716 }, - { url = "https://files.pythonhosted.org/packages/1f/1b/54336d876186920e185066d8c3024ad55f21d7cc3683c856127ddb7b13ce/greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761", size = 299490 }, - { url = "https://files.pythonhosted.org/packages/5f/17/bea55bf36990e1638a2af5ba10c1640273ef20f627962cf97107f1e5d637/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011", size = 643731 }, - { url = "https://files.pythonhosted.org/packages/78/d2/aa3d2157f9ab742a08e0fd8f77d4699f37c22adfbfeb0c610a186b5f75e0/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13", size = 649304 }, - { url = "https://files.pythonhosted.org/packages/f1/8e/d0aeffe69e53ccff5a28fa86f07ad1d2d2d6537a9506229431a2a02e2f15/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475", size = 646537 }, - { url = "https://files.pythonhosted.org/packages/05/79/e15408220bbb989469c8871062c97c6c9136770657ba779711b90870d867/greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b", size = 642506 }, - { url = "https://files.pythonhosted.org/packages/18/87/470e01a940307796f1d25f8167b551a968540fbe0551c0ebb853cb527dd6/greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822", size = 602753 }, - { url = "https://files.pythonhosted.org/packages/e2/72/576815ba674eddc3c25028238f74d7b8068902b3968cbe456771b166455e/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01", size = 1122731 }, - { url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112 }, -] - [[package]] name = "h11" version = "0.14.0" @@ -2053,15 +1864,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/9f/5b5481d716670ed5fbd8d06dfa94b7108272b645da2f2406eb909cb6a450/libcst-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:4d6acb0bdee1e55b44c6215c59755ec4693ac01e74bb1fde04c37358b378835d", size = 2029600 }, ] -[[package]] -name = "lockfile" -version = "0.12.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/17/47/72cb04a58a35ec495f96984dddb48232b551aafb95bde614605b754fe6f7/lockfile-0.12.2.tar.gz", hash = "sha256:6aed02de03cba24efabcd600b30540140634fc06cfa603822d508d5361e9f799", size = 20874 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/22/9460e311f340cb62d26a38c419b1381b8593b0bb6b5d1f056938b086d362/lockfile-0.12.2-py2.py3-none-any.whl", hash = "sha256:6c3cb24f344923d30b2785d5ad75182c8ea7ac1b6171b08657258ec7429d50fa", size = 13564 }, -] - [[package]] name = "logfire" version = "1.2.0" @@ -2439,20 +2241,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124", size = 53985 }, ] -[[package]] -name = "paramiko" -version = "3.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "bcrypt" }, - { name = "cryptography" }, - { name = "pynacl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1b/0f/c00296e36ff7485935b83d466c4f2cf5934b84b0ad14e81796e1d9d3609b/paramiko-3.5.0.tar.gz", hash = "sha256:ad11e540da4f55cedda52931f1a3f812a8238a7af7f62a60de538cd80bb28124", size = 1704305 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/66/14b2c030fcce69cba482d205c2d1462ca5c77303a263260dcb1192801c85/paramiko-3.5.0-py3-none-any.whl", hash = "sha256:1fedf06b085359051cd7d0d270cebe19e755a8a921cc2ddbfa647fb0cd7d68f9", size = 227143 }, -] - [[package]] name = "parso" version = "0.8.4" @@ -2512,8 +2300,12 @@ wheels = [ [[package]] name = "pocket" -version = "0.3.7" -source = { virtual = "archivebox/vendor/pocket" } +version = "0.3.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/b6/cd79a0e237e733e2f8a196f4e9f4d30d99c769b809c5fbbea9e34400655d/pocket-0.3.6.tar.gz", hash = "sha256:907bf16a19fae9c2080f799d979de4c8daa36d6d28e86ceb9fc17d6f0bdb89b9", size = 3749 } [[package]] name = "prompt-toolkit" @@ -2733,41 +2525,16 @@ wheels = [ [[package]] name = "pydantic-pkgr" version = "0.5.4" -source = { editable = "archivebox/vendor/pydantic-pkgr" } +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "platformdirs" }, { name = "pydantic" }, { name = "pydantic-core" }, { name = "typing-extensions" }, ] - -[package.optional-dependencies] -all = [ - { name = "ansible" }, - { name = "ansible-core" }, - { name = "ansible-runner" }, - { name = "pyinfra" }, -] -ansible = [ - { name = "ansible" }, - { name = "ansible-core" }, - { name = "ansible-runner" }, -] -pyinfra = [ - { name = "pyinfra" }, -] - -[package.metadata] -requires-dist = [ - { name = "ansible", marker = "extra == 'ansible'", specifier = ">=10.5.0" }, - { name = "ansible-core", marker = "extra == 'ansible'", specifier = ">=2.17.5" }, - { name = "ansible-runner", marker = "extra == 'ansible'", specifier = ">=2.4.0" }, - { name = "platformdirs", specifier = ">=4.3.6" }, - { name = "pydantic", specifier = ">=2.7.1" }, - { name = "pydantic-core", specifier = ">=2.18.2" }, - { name = "pydantic-pkgr", extras = ["pyinfra", "ansible"], marker = "extra == 'all'", editable = "archivebox/vendor/pydantic-pkgr" }, - { name = "pyinfra", marker = "extra == 'pyinfra'", specifier = ">=2.6.1" }, - { name = "typing-extensions", specifier = ">=4.11.0" }, +sdist = { url = "https://files.pythonhosted.org/packages/d2/18/3bf29e213c4a19d5b08e0fa1048c72f76c54565a208cced1fd4a60f989fc/pydantic_pkgr-0.5.4.tar.gz", hash = "sha256:e3487b46357b1e1b729363385590355cfac261b18ed207f59e9b613c5a8d45b2", size = 42408 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/97/9ec8d45e4af1a3af7d0ca78e12bcb1d74a446399034cb1514aab2bac056e/pydantic_pkgr-0.5.4-py3-none-any.whl", hash = "sha256:46ad1ad5954ee9c55b2c2f2c2be749a39992a89edde624454e63d8a7b550be8b", size = 45061 }, ] [[package]] @@ -2801,49 +2568,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/3f/01c8b82017c199075f8f788d0d906b9ffbbc5a47dc9918a945e13d5a2bda/pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a", size = 1205513 }, ] -[[package]] -name = "pyinfra" -version = "3.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "configparser" }, - { name = "distro" }, - { name = "gevent" }, - { name = "jinja2" }, - { name = "packaging" }, - { name = "paramiko" }, - { name = "python-dateutil" }, - { name = "pywinrm" }, - { name = "setuptools" }, - { name = "typeguard" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/12/1c/bb923dcd1ee29272e31986ef5f64e91b586a0c685efe82672f6cf468e96d/pyinfra-3.1.1.tar.gz", hash = "sha256:5209a05897597c8747511bb559809a64a84377ae77424d3869d46583f95f2f30", size = 198499 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/a2/56/cf53e42877039d13c3e07d63a38ce28e2cc4dca167a2cdc5420f2766f95a/pyinfra-3.1.1-py2.py3-none-any.whl", hash = "sha256:c87c75fcc03197ce84cb078838e225669be5cc0c4d4e52e408a9e774a3d183f6", size = 255376 }, -] - -[[package]] -name = "pynacl" -version = "1.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920 }, - { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722 }, - { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087 }, - { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678 }, - { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660 }, - { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824 }, - { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912 }, - { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624 }, - { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141 }, -] - [[package]] name = "pyopenssl" version = "24.2.1" @@ -2856,19 +2580,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/dd/e0aa7ebef5168c75b772eda64978c597a9129b46be17779054652a7999e4/pyOpenSSL-24.2.1-py3-none-any.whl", hash = "sha256:967d5719b12b243588573f39b0c677637145c7a1ffedcd495a487e58177fbb8d", size = 58390 }, ] -[[package]] -name = "pyspnego" -version = "0.11.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, - { name = "sspilib", marker = "sys_platform == 'win32'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/46/f5/1f938a781742d18475ac43a101ec8a9499e1655da0984e08b59e20012c04/pyspnego-0.11.1.tar.gz", hash = "sha256:e92ed8b0a62765b9d6abbb86a48cf871228ddb97678598dc01c9c39a626823f6", size = 225697 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/c3/4dc3d1d029e14bf065f1df9e98e3e503e622de34706a06ab6c3731377e85/pyspnego-0.11.1-py3-none-any.whl", hash = "sha256:129a4294f2c4d681d5875240ef87accc6f1d921e8983737fb0b59642b397951e", size = 130456 }, -] - [[package]] name = "pytest" version = "8.3.3" @@ -2928,19 +2639,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3b/91/832fb3b3a1f62bd2ab4924f6be0c7736c9bc4f84d3b153b74efcf6d4e4a1/python_crontab-3.2.0-py3-none-any.whl", hash = "sha256:82cb9b6a312d41ff66fd3caf3eed7115c28c195bfb50711bc2b4b9592feb9fe5", size = 27351 }, ] -[[package]] -name = "python-daemon" -version = "3.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "lockfile" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/54/cd/d62884732e5d6ff6906234169d06338d53e37243c60cf73679c8942f9e42/python_daemon-3.1.0.tar.gz", hash = "sha256:fdb621d7e5f46e74b4de1ad6b0fff6e69cd91b4f219de1476190ebdd0f4781df", size = 61947 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/78/09ce91de8b31930c415d7439fa4f9d00d25af57135c16358c0b5b0ae0dea/python_daemon-3.1.0-py3-none-any.whl", hash = "sha256:a66b5896f0aed5807a25c6128268eb496488b1f9c6927c487710049ba16be32a", size = 30899 }, -] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -3011,20 +2709,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/11/c3/005fcca25ce078d2cc29fd559379817424e94885510568bc1bc53d7d5846/pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725", size = 508002 }, ] -[[package]] -name = "pywinrm" -version = "0.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "requests" }, - { name = "requests-ntlm" }, - { name = "xmltodict" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5a/2f/d835c342c4b11e28beaccef74982e7669986c84bf19654c39f53c8b8243c/pywinrm-0.5.0.tar.gz", hash = "sha256:5428eb1e494af7954546cd4ff15c9ef1a30a75e05b25a39fd606cef22201e9f1", size = 40875 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0c/45/4340320145c225387f40ce412de1b209d991c322032e4922cc0a9935fd31/pywinrm-0.5.0-py3-none-any.whl", hash = "sha256:c267046d281de613fc7c8a528cdd261564d9b99bdb7c2926221eff3263b700c8", size = 48182 }, -] - [[package]] name = "pyyaml" version = "6.0.2" @@ -3167,20 +2851,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] -[[package]] -name = "requests-ntlm" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cryptography" }, - { name = "pyspnego" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/15/74/5d4e1815107e9d78c44c3ad04740b00efd1189e5a9ec11e5275b60864e54/requests_ntlm-1.3.0.tar.gz", hash = "sha256:b29cc2462623dffdf9b88c43e180ccb735b4007228a542220e882c58ae56c668", size = 16112 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/5d/836b97537a390cf811b0488490c389c5a614f0a93acb23f347bd37a2d914/requests_ntlm-1.3.0-py3-none-any.whl", hash = "sha256:4c7534a7d0e482bb0928531d621be4b2c74ace437e88c5a357ceb7452d25a510", size = 6577 }, -] - [[package]] name = "requests-tracker" version = "0.3.3" @@ -3194,15 +2864,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/f5/d2fd9443c1839edf0c17216e9ab03201c16468e82e2968504fc738cd6917/requests_tracker-0.3.3-py3-none-any.whl", hash = "sha256:31d8924470ceea34be51743142c5248f1bf625d2ff95d1f0dccc2cfe14ecda0b", size = 58078 }, ] -[[package]] -name = "resolvelib" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ce/10/f699366ce577423cbc3df3280063099054c23df70856465080798c6ebad6/resolvelib-1.0.1.tar.gz", hash = "sha256:04ce76cbd63fded2078ce224785da6ecd42b9564b1390793f64ddecbe997b309", size = 21065 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/fc/e9ccf0521607bcd244aa0b3fbd574f71b65e9ce6a112c83af988bbbe2e23/resolvelib-1.0.1-py2.py3-none-any.whl", hash = "sha256:d2da45d1a8dfee81bdd591647783e340ef3bcb104b54c383f70d422ef5cc7dbf", size = 17194 }, -] - [[package]] name = "rich" version = "13.9.3" @@ -3446,26 +3107,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5d/a5/b2860373aa8de1e626b2bdfdd6df4355f0565b47e51f7d0c54fe70faf8fe/sqlparse-0.5.1-py3-none-any.whl", hash = "sha256:773dcbf9a5ab44a090f3441e2180efe2560220203dc2f8c0b0fa141e18b505e4", size = 44156 }, ] -[[package]] -name = "sspilib" -version = "0.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/36/48/8d634ac9aa5404b77f2d66b5a354751b7bbbf2be2947328fe895034cb750/sspilib-0.2.0.tar.gz", hash = "sha256:4d6cd4290ca82f40705efeb5e9107f7abcd5e647cb201a3d04371305938615b8", size = 55815 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/ac/b59283a2a0c91ef136f4979d711cd8dcd005b9f18b4a50ffaaa50e00f200/sspilib-0.2.0-cp310-cp310-win32.whl", hash = "sha256:e436fa09bcf353a364a74b3ef6910d936fa8cd1493f136e517a9a7e11b319c57", size = 487673 }, - { url = "https://files.pythonhosted.org/packages/c5/bc/84cb16b512902b972cfd89130918f01aabb8016814442ff6bd2cf89d6530/sspilib-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:850a17c98d2b8579b183ce37a8df97d050bc5b31ab13f5a6d9e39c9692fe3754", size = 565326 }, - { url = "https://files.pythonhosted.org/packages/c5/0d/d15fe0e5c87a51b7d693e889656816fd8d67995fbd072ab9852934e9ecf4/sspilib-0.2.0-cp310-cp310-win_arm64.whl", hash = "sha256:a4d788a53b8db6d1caafba36887d5ac2087e6b6be6f01eb48f8afea6b646dbb5", size = 473562 }, - { url = "https://files.pythonhosted.org/packages/70/16/c31487f432724813a27f30c1a63ec07217adf65572e33fe9c4dcfd47a1b3/sspilib-0.2.0-cp311-cp311-win32.whl", hash = "sha256:400d5922c2c2261009921157c4b43d868e84640ad86e4dc84c95b07e5cc38ac6", size = 485419 }, - { url = "https://files.pythonhosted.org/packages/15/e9/0cb63b7f1014eff9c1a5b83920a423080b10f29ddf0264fced6abbdbad28/sspilib-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:d3e7d19c16ba9189ef8687b591503db06cfb9c5eb32ab1ca3bb9ebc1a8a5f35c", size = 564816 }, - { url = "https://files.pythonhosted.org/packages/b9/d9/3b8295f652afe71c0cdfd731eb7d37cc13a8adbfeacd3d67606d486d79b2/sspilib-0.2.0-cp311-cp311-win_arm64.whl", hash = "sha256:f65c52ead8ce95eb78a79306fe4269ee572ef3e4dcc108d250d5933da2455ecc", size = 472529 }, - { url = "https://files.pythonhosted.org/packages/a9/82/07a49f00c0e7feff26f288b5f0747add197fc0db1ddddfab5fd5bdd94bdf/sspilib-0.2.0-cp312-cp312-win32.whl", hash = "sha256:bdf9a4f424add02951e1f01f47441d2e69a9910471e99c2c88660bd8e184d7f8", size = 487318 }, - { url = "https://files.pythonhosted.org/packages/38/54/949a9e9c07cd6efead79a7f78cc951cb5fa4f9f1fcb25b8520fd2adcdbe0/sspilib-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:40a97ca83e503a175d1dc9461836994e47e8b9bcf56cab81a2c22e27f1993079", size = 569220 }, - { url = "https://files.pythonhosted.org/packages/8f/52/c7a16472e9582474626f48ec79a821f66e5698cf5552baf923dfc636989e/sspilib-0.2.0-cp312-cp312-win_arm64.whl", hash = "sha256:8ffc09819a37005c66a580ff44f544775f9745d5ed1ceeb37df4e5ff128adf36", size = 471371 }, - { url = "https://files.pythonhosted.org/packages/bc/9c/8784d3afe27c2f68620ea60fa2b6347100694db35193ba42714bdf23f882/sspilib-0.2.0-cp313-cp313-win32.whl", hash = "sha256:b9044d6020aa88d512e7557694fe734a243801f9a6874e1c214451eebe493d92", size = 483600 }, - { url = "https://files.pythonhosted.org/packages/49/ad/40f898075c913c75060c17c9cc6d6b86e8f83b6f5e1e017627b07ff53fcd/sspilib-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:c39a698491f43618efca8776a40fb7201d08c415c507f899f0df5ada15abefaa", size = 563678 }, - { url = "https://files.pythonhosted.org/packages/dd/84/3232ee82e33e426cd9e2011111a3136e5715428f0331a6739930b530333a/sspilib-0.2.0-cp313-cp313-win_arm64.whl", hash = "sha256:863b7b214517b09367511c0ef931370f0386ed2c7c5613092bf9b106114c4a0e", size = 469030 }, -] - [[package]] name = "stack-data" version = "0.6.3" @@ -3562,18 +3203,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/6c/a53cc9a97c2da76d9cd83c03f377468599a28f2d4ad9fc71c3b99640e71e/txaio-23.1.1-py2.py3-none-any.whl", hash = "sha256:aaea42f8aad50e0ecfb976130ada140797e9dcb85fad2cf72b0f37f8cefcb490", size = 30512 }, ] -[[package]] -name = "typeguard" -version = "4.4.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/79/5a/91b7c8cfc2e96962442abc9d65c650436dd831910b4d7878980d6596fb98/typeguard-4.4.0.tar.gz", hash = "sha256:463bd8697a65a4aa576a63767c369b1ecfba8a5ba735edfe3223127b6ecfa28c", size = 74399 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/61/a3/00203767544b597a9e3c57b29a84967b3230f00bdd9aa6a52a73187043b4/typeguard-4.4.0-py3-none-any.whl", hash = "sha256:8ca34c14043f53b2caae7040549ba431770869bcd6287cfa8239db7ecb882b4a", size = 35736 }, -] - [[package]] name = "typeid-python" version = "0.3.1" @@ -3903,18 +3532,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/8b/5ba542fa83c90e09eac972fc9baca7a88e7e7ca4b221a89251954019308b/zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350", size = 9200 }, ] -[[package]] -name = "zope-event" -version = "5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/46/c2/427f1867bb96555d1d34342f1dd97f8c420966ab564d58d18469a1db8736/zope.event-5.0.tar.gz", hash = "sha256:bac440d8d9891b4068e2b5a2c5e2c9765a9df762944bda6955f96bb9b91e67cd", size = 17350 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/42/f8dbc2b9ad59e927940325a22d6d3931d630c3644dae7e2369ef5d9ba230/zope.event-5.0-py3-none-any.whl", hash = "sha256:2832e95014f4db26c47a13fdaef84cef2f4df37e66b59d8f1f4a8f319a632c26", size = 6824 }, -] - [[package]] name = "zope-interface" version = "7.1.1" From dee4eb7992d4acb237cadf7e98b876b0ef463235 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 20:05:20 -0700 Subject: [PATCH 13/25] rename vendor dir to pkgs --- .github/workflows/test.yml | 2 +- archivebox/.flake8 | 2 +- archivebox/__init__.py | 4 +- archivebox/pkgs/__init__.py | 39 +++++++++++++++++++ .../abx-plugin-archivedotorg/README.md | 0 .../abx_plugin_archivedotorg/__init__.py | 0 .../abx_plugin_archivedotorg/archive_org.py | 0 .../abx_plugin_archivedotorg/config.py | 0 .../abx-plugin-archivedotorg/pyproject.toml | 0 .../abx-plugin-chrome/README.md | 0 .../abx_plugin_chrome/__init__.py | 0 .../abx_plugin_chrome/binaries.py | 0 .../abx_plugin_chrome/config.py | 0 .../abx_plugin_chrome/dom.py | 0 .../abx_plugin_chrome/pdf.py | 0 .../abx_plugin_chrome/screenshot.py | 0 .../abx-plugin-chrome/pyproject.toml | 0 .../abx-plugin-curl/README.md | 0 .../abx_plugin_curl/__init__.py | 0 .../abx_plugin_curl/binaries.py | 0 .../abx-plugin-curl/abx_plugin_curl/config.py | 0 .../abx_plugin_curl/headers.py | 0 .../abx-plugin-curl/pyproject.toml | 0 .../abx-plugin-default-binproviders/README.md | 0 .../abx_plugin_default_binproviders.py | 0 .../pyproject.toml | 0 .../abx-plugin-favicon/README.md | 0 .../abx_plugin_favicon/__init__.py | 0 .../abx_plugin_favicon/config.py | 0 .../abx_plugin_favicon/favicon.py | 0 .../abx-plugin-favicon/pyproject.toml | 0 .../{vendor => pkgs}/abx-plugin-git/README.md | 0 .../abx-plugin-git/abx_plugin_git/__init__.py | 0 .../abx-plugin-git/abx_plugin_git/binaries.py | 0 .../abx-plugin-git/abx_plugin_git/config.py | 0 .../abx_plugin_git/extractors.py | 0 .../abx-plugin-git/abx_plugin_git/git.py | 0 .../abx-plugin-git/pyproject.toml | 0 .../abx-plugin-htmltotext/README.md | 0 .../abx_plugin_htmltotext/__init__.py | 0 .../abx_plugin_htmltotext/config.py | 0 .../abx_plugin_htmltotext/htmltotext.py | 0 .../abx-plugin-htmltotext/pyproject.toml | 0 .../abx-plugin-ldap-auth/README.md | 0 .../abx_plugin_ldap_auth/__init__.py | 0 .../abx_plugin_ldap_auth/binaries.py | 0 .../abx_plugin_ldap_auth/config.py | 0 .../abx-plugin-ldap-auth/pyproject.toml | 0 .../abx-plugin-mercury/README.md | 0 .../abx_plugin_mercury/__init__.py | 0 .../abx_plugin_mercury/binaries.py | 0 .../abx_plugin_mercury/config.py | 0 .../abx_plugin_mercury/extractors.py | 0 .../abx_plugin_mercury/mercury.py | 0 .../abx-plugin-mercury/pyproject.toml | 0 .../{vendor => pkgs}/abx-plugin-npm/README.md | 0 .../abx-plugin-npm/abx_plugin_npm/__init__.py | 0 .../abx-plugin-npm/abx_plugin_npm/binaries.py | 0 .../abx_plugin_npm/binproviders.py | 0 .../abx-plugin-npm/abx_plugin_npm/config.py | 0 .../abx-plugin-npm/pyproject.toml | 0 .../{vendor => pkgs}/abx-plugin-pip/README.md | 0 .../abx_plugin_pip/.plugin_order | 0 .../abx-plugin-pip/abx_plugin_pip/__init__.py | 0 .../abx-plugin-pip/abx_plugin_pip/binaries.py | 0 .../abx_plugin_pip/binproviders.py | 0 .../abx-plugin-pip/abx_plugin_pip/config.py | 0 .../abx-plugin-pip/pyproject.toml | 0 .../abx-plugin-playwright/README.md | 0 .../abx_plugin_playwright/__init__.py | 0 .../abx_plugin_playwright/binaries.py | 0 .../abx_plugin_playwright/binproviders.py | 0 .../abx_plugin_playwright/config.py | 0 .../abx-plugin-playwright/pyproject.toml | 0 .../abx-plugin-pocket/README.md | 0 .../abx_plugin_pocket/__init__.py | 0 .../abx_plugin_pocket/config.py | 0 .../abx-plugin-pocket/pyproject.toml | 0 .../abx-plugin-puppeteer/README.md | 0 .../abx_plugin_puppeteer/__init__.py | 0 .../abx_plugin_puppeteer/binaries.py | 0 .../abx_plugin_puppeteer/binproviders.py | 0 .../abx_plugin_puppeteer/config.py | 0 .../abx-plugin-puppeteer/pyproject.toml | 0 .../abx-plugin-readability/README.md | 0 .../abx_plugin_readability/__init__.py | 0 .../abx_plugin_readability/binaries.py | 0 .../abx_plugin_readability/config.py | 0 .../abx_plugin_readability/extractors.py | 0 .../abx_plugin_readability/readability.py | 0 .../abx-plugin-readability/pyproject.toml | 0 .../abx-plugin-readwise/README.md | 0 .../abx_plugin_readwise.py | 0 .../abx-plugin-readwise/pyproject.toml | 0 .../abx-plugin-ripgrep-search/README.md | 0 .../abx_plugin_ripgrep_search/__init__.py | 0 .../abx_plugin_ripgrep_search/binaries.py | 0 .../abx_plugin_ripgrep_search/config.py | 0 .../searchbackend.py | 0 .../abx-plugin-ripgrep-search/pyproject.toml | 0 .../abx-plugin-singlefile/README.md | 0 .../abx_plugin_singlefile/__init__.py | 0 .../abx_plugin_singlefile/binaries.py | 0 .../abx_plugin_singlefile/config.py | 0 .../abx_plugin_singlefile/extractors.py | 0 .../abx_plugin_singlefile/models.py | 0 .../abx_plugin_singlefile/singlefile.py | 0 .../abx-plugin-singlefile/pyproject.toml | 0 .../abx-plugin-sonic-search/README.md | 0 .../abx_plugin_sonic_search/__init__.py | 0 .../abx_plugin_sonic_search/binaries.py | 0 .../abx_plugin_sonic_search/config.py | 0 .../abx_plugin_sonic_search/searchbackend.py | 0 .../abx-plugin-sonic-search/pyproject.toml | 0 .../abx-plugin-sqlitefts-search/README.md | 0 .../abx_plugin_sqlitefts_search/__init__.py | 0 .../abx_plugin_sqlitefts_search/config.py | 0 .../searchbackend.py | 0 .../pyproject.toml | 0 .../abx-plugin-title/README.md | 0 .../abx_plugin_title/__init__.py | 0 .../abx_plugin_title/extractor.py | 0 .../abx-plugin-title/pyproject.toml | 0 .../abx-plugin-wget/README.md | 0 .../abx_plugin_wget/__init__.py | 0 .../abx_plugin_wget/binaries.py | 0 .../abx-plugin-wget/abx_plugin_wget/config.py | 0 .../abx_plugin_wget/extractors.py | 0 .../abx-plugin-wget/abx_plugin_wget/wget.py | 0 .../abx_plugin_wget/wget_util.py | 0 .../abx-plugin-wget/pyproject.toml | 0 .../abx-plugin-ytdlp/README.md | 0 .../abx_plugin_ytdlp/__init__.py | 0 .../abx_plugin_ytdlp/binaries.py | 0 .../abx_plugin_ytdlp/config.py | 0 .../abx_plugin_ytdlp/media.py | 0 .../abx-plugin-ytdlp/pyproject.toml | 0 .../abx-spec-archivebox/README.md | 0 .../abx_spec_archivebox/__init__.py | 0 .../abx_spec_archivebox/effects.py | 0 .../abx_spec_archivebox/events.py | 0 .../abx_spec_archivebox/reads.py | 0 .../abx_spec_archivebox/states.py | 0 .../abx_spec_archivebox/writes.py | 0 .../abx-spec-archivebox/pyproject.toml | 0 .../abx-spec-config/README.md | 0 .../abx_spec_config/__init__.py | 0 .../abx_spec_config/base_configset.py | 0 .../abx_spec_config/toml_util.py | 0 .../abx-spec-config/pyproject.toml | 0 .../abx-spec-django/README.md | 0 .../abx-spec-django/abx_spec_django.py | 0 .../abx-spec-django/pyproject.toml | 0 .../abx-spec-extractor/README.md | 0 .../abx-spec-extractor/abx_spec_extractor.py | 0 .../abx-spec-extractor/pyproject.toml | 0 .../abx-spec-pydantic-pkgr/README.md | 0 .../abx_spec_pydantic_pkgr.py | 0 .../abx-spec-pydantic-pkgr/pyproject.toml | 0 .../abx-spec-searchbackend/README.md | 0 .../abx_spec_searchbackend.py | 0 .../abx-spec-searchbackend/pyproject.toml | 0 archivebox/{vendor => pkgs}/abx/README.md | 0 archivebox/{vendor => pkgs}/abx/abx.py | 1 + .../{vendor => pkgs}/abx/pyproject.toml | 0 archivebox/vendor/__init__.py | 39 ------------------- archivebox/vendor/requirements.txt | 8 ---- pyproject.toml | 6 +-- 168 files changed, 47 insertions(+), 54 deletions(-) create mode 100644 archivebox/pkgs/__init__.py rename archivebox/{vendor => pkgs}/abx-plugin-archivedotorg/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-archivedotorg/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/dom.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/pdf.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/abx_plugin_chrome/screenshot.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-chrome/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/abx_plugin_curl/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/abx_plugin_curl/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/abx_plugin_curl/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/abx_plugin_curl/headers.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-curl/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-default-binproviders/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-default-binproviders/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-favicon/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-favicon/abx_plugin_favicon/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-favicon/abx_plugin_favicon/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-favicon/abx_plugin_favicon/favicon.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-favicon/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/abx_plugin_git/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/abx_plugin_git/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/abx_plugin_git/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/abx_plugin_git/extractors.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/abx_plugin_git/git.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-git/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-htmltotext/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-htmltotext/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ldap-auth/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ldap-auth/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/abx_plugin_mercury/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/abx_plugin_mercury/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/abx_plugin_mercury/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/abx_plugin_mercury/extractors.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/abx_plugin_mercury/mercury.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-mercury/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/abx_plugin_npm/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/abx_plugin_npm/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/abx_plugin_npm/binproviders.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/abx_plugin_npm/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-npm/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/abx_plugin_pip/.plugin_order (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/abx_plugin_pip/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/abx_plugin_pip/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/abx_plugin_pip/binproviders.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/abx_plugin_pip/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pip/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/abx_plugin_playwright/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/abx_plugin_playwright/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/abx_plugin_playwright/binproviders.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/abx_plugin_playwright/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-playwright/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pocket/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pocket/abx_plugin_pocket/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pocket/abx_plugin_pocket/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-pocket/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-puppeteer/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/abx_plugin_readability/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/abx_plugin_readability/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/abx_plugin_readability/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/abx_plugin_readability/extractors.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/abx_plugin_readability/readability.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readability/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readwise/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readwise/abx_plugin_readwise.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-readwise/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ripgrep-search/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/models.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-singlefile/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sonic-search/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sqlitefts-search/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-sqlitefts-search/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-title/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-title/abx_plugin_title/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-title/abx_plugin_title/extractor.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-title/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/extractors.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/wget.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/abx_plugin_wget/wget_util.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-wget/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/README.md (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py (100%) rename archivebox/{vendor => pkgs}/abx-plugin-ytdlp/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/effects.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/events.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/reads.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/states.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/abx_spec_archivebox/writes.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-archivebox/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-config/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-config/abx_spec_config/__init__.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-config/abx_spec_config/base_configset.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-config/abx_spec_config/toml_util.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-config/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-django/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-django/abx_spec_django.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-django/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-extractor/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-extractor/abx_spec_extractor.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-extractor/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-pydantic-pkgr/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-pydantic-pkgr/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx-spec-searchbackend/README.md (100%) rename archivebox/{vendor => pkgs}/abx-spec-searchbackend/abx_spec_searchbackend.py (100%) rename archivebox/{vendor => pkgs}/abx-spec-searchbackend/pyproject.toml (100%) rename archivebox/{vendor => pkgs}/abx/README.md (100%) rename archivebox/{vendor => pkgs}/abx/abx.py (99%) rename archivebox/{vendor => pkgs}/abx/pyproject.toml (100%) delete mode 100644 archivebox/vendor/__init__.py delete mode 100644 archivebox/vendor/requirements.txt diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 79cc28e7..b1153211 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -102,7 +102,7 @@ jobs: # TODO: remove this exception for windows once we get tests passing on that platform if: ${{ !contains(matrix.os, 'windows') }} run: | - python -m pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist + python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs docker_tests: runs-on: ubuntu-latest diff --git a/archivebox/.flake8 b/archivebox/.flake8 index 01af646d..bb7176bd 100644 --- a/archivebox/.flake8 +++ b/archivebox/.flake8 @@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E select = F,E9,W max-line-length = 130 max-complexity = 10 -exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv +exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv,data,data* diff --git a/archivebox/__init__.py b/archivebox/__init__.py index 24863926..fd32778c 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -47,8 +47,8 @@ from .monkey_patches import * # noqa # print('LOADING VENDORED LIBRARIES') -from .vendor import load_vendored_libs # noqa -load_vendored_libs() +from .pkgs import load_vendored_pkgs # noqa +load_vendored_pkgs() # print('DONE LOADING VENDORED LIBRARIES') # Load ABX Plugin Specifications + Default Implementations diff --git a/archivebox/pkgs/__init__.py b/archivebox/pkgs/__init__.py new file mode 100644 index 00000000..c5f4cc82 --- /dev/null +++ b/archivebox/pkgs/__init__.py @@ -0,0 +1,39 @@ +import sys +import importlib +from pathlib import Path + +PKGS_DIR = Path(__file__).parent + +VENDORED_PKGS = [ + 'abx', + # 'pydantic-pkgr', +] + +# scan ./pkgs and add all dirs present to list of available VENDORED_PKGS +for subdir in reversed(sorted(PKGS_DIR.iterdir())): + if subdir.is_dir() and subdir.name not in VENDORED_PKGS and not subdir.name.startswith('_'): + VENDORED_PKGS.append(subdir.name) + + +def load_vendored_pkgs(): + """Add archivebox/vendor to sys.path and import all vendored libraries present within""" + if str(PKGS_DIR) not in sys.path: + sys.path.append(str(PKGS_DIR)) + + for pkg_name in VENDORED_PKGS: + pkg_dir = PKGS_DIR / pkg_name + assert pkg_dir.is_dir(), f'Required vendored pkg {pkg_name} could not be found in {pkg_dir}' + + try: + lib = importlib.import_module(pkg_name) + # print(f"Successfully imported lib from environment {pkg_name}") + except ImportError: + sys.path.append(str(pkg_dir)) + try: + lib = importlib.import_module(pkg_name) + # print(f"Successfully imported lib from vendored fallback {pkg_name}: {inspect.getfile(lib)}") + except ImportError as e: + print(f"Failed to import lib from environment or vendored fallback {pkg_name}: {e}", file=sys.stderr) + sys.exit(1) + + diff --git a/archivebox/vendor/abx-plugin-archivedotorg/README.md b/archivebox/pkgs/abx-plugin-archivedotorg/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-archivedotorg/README.md rename to archivebox/pkgs/abx-plugin-archivedotorg/README.md diff --git a/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py diff --git a/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py similarity index 100% rename from archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py diff --git a/archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py diff --git a/archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-archivedotorg/pyproject.toml rename to archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-chrome/README.md b/archivebox/pkgs/abx-plugin-chrome/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/README.md rename to archivebox/pkgs/abx-plugin-chrome/README.md diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/__init__.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/binaries.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/config.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/dom.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/pdf.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py diff --git a/archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/abx_plugin_chrome/screenshot.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py diff --git a/archivebox/vendor/abx-plugin-chrome/pyproject.toml b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-chrome/pyproject.toml rename to archivebox/pkgs/abx-plugin-chrome/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-curl/README.md b/archivebox/pkgs/abx-plugin-curl/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-curl/README.md rename to archivebox/pkgs/abx-plugin-curl/README.md diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-curl/abx_plugin_curl/__init__.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-curl/abx_plugin_curl/binaries.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-curl/abx_plugin_curl/config.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py diff --git a/archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py similarity index 100% rename from archivebox/vendor/abx-plugin-curl/abx_plugin_curl/headers.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py diff --git a/archivebox/vendor/abx-plugin-curl/pyproject.toml b/archivebox/pkgs/abx-plugin-curl/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-curl/pyproject.toml rename to archivebox/pkgs/abx-plugin-curl/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-default-binproviders/README.md b/archivebox/pkgs/abx-plugin-default-binproviders/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-default-binproviders/README.md rename to archivebox/pkgs/abx-plugin-default-binproviders/README.md diff --git a/archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py similarity index 100% rename from archivebox/vendor/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py rename to archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py diff --git a/archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-default-binproviders/pyproject.toml rename to archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-favicon/README.md b/archivebox/pkgs/abx-plugin-favicon/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-favicon/README.md rename to archivebox/pkgs/abx-plugin-favicon/README.md diff --git a/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/__init__.py rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py diff --git a/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/config.py rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py diff --git a/archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py similarity index 100% rename from archivebox/vendor/abx-plugin-favicon/abx_plugin_favicon/favicon.py rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py diff --git a/archivebox/vendor/abx-plugin-favicon/pyproject.toml b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-favicon/pyproject.toml rename to archivebox/pkgs/abx-plugin-favicon/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-git/README.md b/archivebox/pkgs/abx-plugin-git/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-git/README.md rename to archivebox/pkgs/abx-plugin-git/README.md diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-git/abx_plugin_git/__init__.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-git/abx_plugin_git/binaries.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-git/abx_plugin_git/config.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py similarity index 100% rename from archivebox/vendor/abx-plugin-git/abx_plugin_git/extractors.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py diff --git a/archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py similarity index 100% rename from archivebox/vendor/abx-plugin-git/abx_plugin_git/git.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py diff --git a/archivebox/vendor/abx-plugin-git/pyproject.toml b/archivebox/pkgs/abx-plugin-git/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-git/pyproject.toml rename to archivebox/pkgs/abx-plugin-git/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-htmltotext/README.md b/archivebox/pkgs/abx-plugin-htmltotext/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-htmltotext/README.md rename to archivebox/pkgs/abx-plugin-htmltotext/README.md diff --git a/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py diff --git a/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py diff --git a/archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py similarity index 100% rename from archivebox/vendor/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py diff --git a/archivebox/vendor/abx-plugin-htmltotext/pyproject.toml b/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-htmltotext/pyproject.toml rename to archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-ldap-auth/README.md b/archivebox/pkgs/abx-plugin-ldap-auth/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-ldap-auth/README.md rename to archivebox/pkgs/abx-plugin-ldap-auth/README.md diff --git a/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py diff --git a/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py diff --git a/archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py diff --git a/archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml b/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-ldap-auth/pyproject.toml rename to archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-mercury/README.md b/archivebox/pkgs/abx-plugin-mercury/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/README.md rename to archivebox/pkgs/abx-plugin-mercury/README.md diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/__init__.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/binaries.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/config.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/extractors.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py diff --git a/archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/abx_plugin_mercury/mercury.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py diff --git a/archivebox/vendor/abx-plugin-mercury/pyproject.toml b/archivebox/pkgs/abx-plugin-mercury/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-mercury/pyproject.toml rename to archivebox/pkgs/abx-plugin-mercury/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-npm/README.md b/archivebox/pkgs/abx-plugin-npm/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-npm/README.md rename to archivebox/pkgs/abx-plugin-npm/README.md diff --git a/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-npm/abx_plugin_npm/__init__.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py diff --git a/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binaries.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py diff --git a/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py similarity index 100% rename from archivebox/vendor/abx-plugin-npm/abx_plugin_npm/binproviders.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py diff --git a/archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-npm/abx_plugin_npm/config.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py diff --git a/archivebox/vendor/abx-plugin-npm/pyproject.toml b/archivebox/pkgs/abx-plugin-npm/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-npm/pyproject.toml rename to archivebox/pkgs/abx-plugin-npm/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-pip/README.md b/archivebox/pkgs/abx-plugin-pip/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-pip/README.md rename to archivebox/pkgs/abx-plugin-pip/README.md diff --git a/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order similarity index 100% rename from archivebox/vendor/abx-plugin-pip/abx_plugin_pip/.plugin_order rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order diff --git a/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-pip/abx_plugin_pip/__init__.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py diff --git a/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binaries.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py diff --git a/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py similarity index 100% rename from archivebox/vendor/abx-plugin-pip/abx_plugin_pip/binproviders.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py diff --git a/archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-pip/abx_plugin_pip/config.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py diff --git a/archivebox/vendor/abx-plugin-pip/pyproject.toml b/archivebox/pkgs/abx-plugin-pip/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-pip/pyproject.toml rename to archivebox/pkgs/abx-plugin-pip/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-playwright/README.md b/archivebox/pkgs/abx-plugin-playwright/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/README.md rename to archivebox/pkgs/abx-plugin-playwright/README.md diff --git a/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/__init__.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py diff --git a/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binaries.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py diff --git a/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/binproviders.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py diff --git a/archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/abx_plugin_playwright/config.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py diff --git a/archivebox/vendor/abx-plugin-playwright/pyproject.toml b/archivebox/pkgs/abx-plugin-playwright/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-playwright/pyproject.toml rename to archivebox/pkgs/abx-plugin-playwright/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-pocket/README.md b/archivebox/pkgs/abx-plugin-pocket/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-pocket/README.md rename to archivebox/pkgs/abx-plugin-pocket/README.md diff --git a/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/__init__.py rename to archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py diff --git a/archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-pocket/abx_plugin_pocket/config.py rename to archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py diff --git a/archivebox/vendor/abx-plugin-pocket/pyproject.toml b/archivebox/pkgs/abx-plugin-pocket/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-pocket/pyproject.toml rename to archivebox/pkgs/abx-plugin-pocket/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-puppeteer/README.md b/archivebox/pkgs/abx-plugin-puppeteer/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/README.md rename to archivebox/pkgs/abx-plugin-puppeteer/README.md diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py diff --git a/archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py diff --git a/archivebox/vendor/abx-plugin-puppeteer/pyproject.toml b/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-puppeteer/pyproject.toml rename to archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-readability/README.md b/archivebox/pkgs/abx-plugin-readability/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-readability/README.md rename to archivebox/pkgs/abx-plugin-readability/README.md diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-readability/abx_plugin_readability/__init__.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-readability/abx_plugin_readability/binaries.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-readability/abx_plugin_readability/config.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py similarity index 100% rename from archivebox/vendor/abx-plugin-readability/abx_plugin_readability/extractors.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py diff --git a/archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py similarity index 100% rename from archivebox/vendor/abx-plugin-readability/abx_plugin_readability/readability.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py diff --git a/archivebox/vendor/abx-plugin-readability/pyproject.toml b/archivebox/pkgs/abx-plugin-readability/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-readability/pyproject.toml rename to archivebox/pkgs/abx-plugin-readability/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-readwise/README.md b/archivebox/pkgs/abx-plugin-readwise/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-readwise/README.md rename to archivebox/pkgs/abx-plugin-readwise/README.md diff --git a/archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py b/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py similarity index 100% rename from archivebox/vendor/abx-plugin-readwise/abx_plugin_readwise.py rename to archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py diff --git a/archivebox/vendor/abx-plugin-readwise/pyproject.toml b/archivebox/pkgs/abx-plugin-readwise/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-readwise/pyproject.toml rename to archivebox/pkgs/abx-plugin-readwise/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/README.md b/archivebox/pkgs/abx-plugin-ripgrep-search/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/README.md rename to archivebox/pkgs/abx-plugin-ripgrep-search/README.md diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py diff --git a/archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml b/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-ripgrep-search/pyproject.toml rename to archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-singlefile/README.md b/archivebox/pkgs/abx-plugin-singlefile/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/README.md rename to archivebox/pkgs/abx-plugin-singlefile/README.md diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/config.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/models.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py diff --git a/archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py diff --git a/archivebox/vendor/abx-plugin-singlefile/pyproject.toml b/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-singlefile/pyproject.toml rename to archivebox/pkgs/abx-plugin-singlefile/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-sonic-search/README.md b/archivebox/pkgs/abx-plugin-sonic-search/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/README.md rename to archivebox/pkgs/abx-plugin-sonic-search/README.md diff --git a/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py diff --git a/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py diff --git a/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py diff --git a/archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py diff --git a/archivebox/vendor/abx-plugin-sonic-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-sonic-search/pyproject.toml rename to archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/README.md b/archivebox/pkgs/abx-plugin-sqlitefts-search/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-sqlitefts-search/README.md rename to archivebox/pkgs/abx-plugin-sqlitefts-search/README.md diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py similarity index 100% rename from archivebox/vendor/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py diff --git a/archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-sqlitefts-search/pyproject.toml rename to archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-title/README.md b/archivebox/pkgs/abx-plugin-title/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-title/README.md rename to archivebox/pkgs/abx-plugin-title/README.md diff --git a/archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-title/abx_plugin_title/__init__.py rename to archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py diff --git a/archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py similarity index 100% rename from archivebox/vendor/abx-plugin-title/abx_plugin_title/extractor.py rename to archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py diff --git a/archivebox/vendor/abx-plugin-title/pyproject.toml b/archivebox/pkgs/abx-plugin-title/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-title/pyproject.toml rename to archivebox/pkgs/abx-plugin-title/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-wget/README.md b/archivebox/pkgs/abx-plugin-wget/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-wget/README.md rename to archivebox/pkgs/abx-plugin-wget/README.md diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/__init__.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/binaries.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/config.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/extractors.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py diff --git a/archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py similarity index 100% rename from archivebox/vendor/abx-plugin-wget/abx_plugin_wget/wget_util.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py diff --git a/archivebox/vendor/abx-plugin-wget/pyproject.toml b/archivebox/pkgs/abx-plugin-wget/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-wget/pyproject.toml rename to archivebox/pkgs/abx-plugin-wget/pyproject.toml diff --git a/archivebox/vendor/abx-plugin-ytdlp/README.md b/archivebox/pkgs/abx-plugin-ytdlp/README.md similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/README.md rename to archivebox/pkgs/abx-plugin-ytdlp/README.md diff --git a/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py diff --git a/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py diff --git a/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py diff --git a/archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py diff --git a/archivebox/vendor/abx-plugin-ytdlp/pyproject.toml b/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-plugin-ytdlp/pyproject.toml rename to archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml diff --git a/archivebox/vendor/abx-spec-archivebox/README.md b/archivebox/pkgs/abx-spec-archivebox/README.md similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/README.md rename to archivebox/pkgs/abx-spec-archivebox/README.md diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/__init__.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/effects.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/events.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/reads.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/states.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py diff --git a/archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/abx_spec_archivebox/writes.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py diff --git a/archivebox/vendor/abx-spec-archivebox/pyproject.toml b/archivebox/pkgs/abx-spec-archivebox/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-archivebox/pyproject.toml rename to archivebox/pkgs/abx-spec-archivebox/pyproject.toml diff --git a/archivebox/vendor/abx-spec-config/README.md b/archivebox/pkgs/abx-spec-config/README.md similarity index 100% rename from archivebox/vendor/abx-spec-config/README.md rename to archivebox/pkgs/abx-spec-config/README.md diff --git a/archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py similarity index 100% rename from archivebox/vendor/abx-spec-config/abx_spec_config/__init__.py rename to archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py diff --git a/archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py similarity index 100% rename from archivebox/vendor/abx-spec-config/abx_spec_config/base_configset.py rename to archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py diff --git a/archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py similarity index 100% rename from archivebox/vendor/abx-spec-config/abx_spec_config/toml_util.py rename to archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py diff --git a/archivebox/vendor/abx-spec-config/pyproject.toml b/archivebox/pkgs/abx-spec-config/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-config/pyproject.toml rename to archivebox/pkgs/abx-spec-config/pyproject.toml diff --git a/archivebox/vendor/abx-spec-django/README.md b/archivebox/pkgs/abx-spec-django/README.md similarity index 100% rename from archivebox/vendor/abx-spec-django/README.md rename to archivebox/pkgs/abx-spec-django/README.md diff --git a/archivebox/vendor/abx-spec-django/abx_spec_django.py b/archivebox/pkgs/abx-spec-django/abx_spec_django.py similarity index 100% rename from archivebox/vendor/abx-spec-django/abx_spec_django.py rename to archivebox/pkgs/abx-spec-django/abx_spec_django.py diff --git a/archivebox/vendor/abx-spec-django/pyproject.toml b/archivebox/pkgs/abx-spec-django/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-django/pyproject.toml rename to archivebox/pkgs/abx-spec-django/pyproject.toml diff --git a/archivebox/vendor/abx-spec-extractor/README.md b/archivebox/pkgs/abx-spec-extractor/README.md similarity index 100% rename from archivebox/vendor/abx-spec-extractor/README.md rename to archivebox/pkgs/abx-spec-extractor/README.md diff --git a/archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py b/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py similarity index 100% rename from archivebox/vendor/abx-spec-extractor/abx_spec_extractor.py rename to archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py diff --git a/archivebox/vendor/abx-spec-extractor/pyproject.toml b/archivebox/pkgs/abx-spec-extractor/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-extractor/pyproject.toml rename to archivebox/pkgs/abx-spec-extractor/pyproject.toml diff --git a/archivebox/vendor/abx-spec-pydantic-pkgr/README.md b/archivebox/pkgs/abx-spec-pydantic-pkgr/README.md similarity index 100% rename from archivebox/vendor/abx-spec-pydantic-pkgr/README.md rename to archivebox/pkgs/abx-spec-pydantic-pkgr/README.md diff --git a/archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py similarity index 100% rename from archivebox/vendor/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py rename to archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py diff --git a/archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml b/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-pydantic-pkgr/pyproject.toml rename to archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml diff --git a/archivebox/vendor/abx-spec-searchbackend/README.md b/archivebox/pkgs/abx-spec-searchbackend/README.md similarity index 100% rename from archivebox/vendor/abx-spec-searchbackend/README.md rename to archivebox/pkgs/abx-spec-searchbackend/README.md diff --git a/archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py b/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py similarity index 100% rename from archivebox/vendor/abx-spec-searchbackend/abx_spec_searchbackend.py rename to archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py diff --git a/archivebox/vendor/abx-spec-searchbackend/pyproject.toml b/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml similarity index 100% rename from archivebox/vendor/abx-spec-searchbackend/pyproject.toml rename to archivebox/pkgs/abx-spec-searchbackend/pyproject.toml diff --git a/archivebox/vendor/abx/README.md b/archivebox/pkgs/abx/README.md similarity index 100% rename from archivebox/vendor/abx/README.md rename to archivebox/pkgs/abx/README.md diff --git a/archivebox/vendor/abx/abx.py b/archivebox/pkgs/abx/abx.py similarity index 99% rename from archivebox/vendor/abx/abx.py rename to archivebox/pkgs/abx/abx.py index 990fe8e1..4b08e743 100644 --- a/archivebox/vendor/abx/abx.py +++ b/archivebox/pkgs/abx/abx.py @@ -262,6 +262,7 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo: # import the plugin module by its name if isinstance(plugin, str): module = importlib.import_module(plugin) + print('IMPORTED PLUGIN:', plugin) plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) elif inspect.ismodule(plugin): module = plugin diff --git a/archivebox/vendor/abx/pyproject.toml b/archivebox/pkgs/abx/pyproject.toml similarity index 100% rename from archivebox/vendor/abx/pyproject.toml rename to archivebox/pkgs/abx/pyproject.toml diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py deleted file mode 100644 index e2e97a7c..00000000 --- a/archivebox/vendor/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -import sys -import importlib -from pathlib import Path - -VENDOR_DIR = Path(__file__).parent - -VENDORED_LIBS = [ - 'abx', - # 'pydantic-pkgr', -] - -# scan ./vendor and add all dirs present to list of available VENDORED_LIBS -for subdir in reversed(sorted(VENDOR_DIR.iterdir())): - if subdir.is_dir() and subdir.name not in VENDORED_LIBS and not subdir.name.startswith('_'): - VENDORED_LIBS.append(subdir.name) - - -def load_vendored_libs(): - """Add archivebox/vendor to sys.path and import all vendored libraries present within""" - if str(VENDOR_DIR) not in sys.path: - sys.path.append(str(VENDOR_DIR)) - - for lib_name in VENDORED_LIBS: - lib_dir = VENDOR_DIR / lib_name - assert lib_dir.is_dir(), f'Expected vendor libary {lib_name} could not be found in {lib_dir}' - - try: - lib = importlib.import_module(lib_name) - # print(f"Successfully imported lib from environment {lib_name}") - except ImportError: - sys.path.append(str(lib_dir)) - try: - lib = importlib.import_module(lib_name) - # print(f"Successfully imported lib from vendored fallback {lib_name}: {inspect.getfile(lib)}") - except ImportError as e: - print(f"Failed to import lib from environment or vendored fallback {lib_name}: {e}", file=sys.stderr) - sys.exit(1) - - diff --git a/archivebox/vendor/requirements.txt b/archivebox/vendor/requirements.txt deleted file mode 100644 index 43be87c2..00000000 --- a/archivebox/vendor/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# this folder contains vendored versions of these packages - -#atomicwrites==1.4.0 -#pocket==0.3.7 -pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7 -#django-taggit==1.3.0 -#base32-crockford==0.3.0 -pydantic-pkgr>=0.4.7 diff --git a/pyproject.toml b/pyproject.toml index 632cc166..7326daa0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ classifiers = [ dependencies = [ - # ... archivebox/vendor/* # see vendored libs here + # ... archivebox/pkgs/* # see vendored libs here ############# Django / Core Libraries ############# "setuptools>=74.1.0", "django>=5.1.1,<6.0", @@ -216,8 +216,8 @@ abx-plugin-htmltotext = { workspace = true } [tool.uv.workspace] -members = ["archivebox/vendor/*"] -exclude = ["archivebox/vendor/__pycache__"] +members = ["archivebox/pkgs/*"] +exclude = ["archivebox/pkgs/__pycache__"] [build-system] requires = ["pdm-backend"] From 30cd48c30dbed4783c08840cfbce3bed23c248b0 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 20:10:07 -0700 Subject: [PATCH 14/25] update lockfiles --- requirements.txt | 212 ++++++++++++++++++++++-- uv.lock | 408 ++++++++++++++++++++++++----------------------- 2 files changed, 402 insertions(+), 218 deletions(-) diff --git a/requirements.txt b/requirements.txt index db2a66f7..12dd9d0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,137 @@ # This file was autogenerated by uv via the following command: # uv pip compile pyproject.toml --all-extras -o requirements.txt +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-default-binproviders + # abx-plugin-favicon + # abx-plugin-git + # abx-plugin-htmltotext + # abx-plugin-ldap-auth + # abx-plugin-mercury + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-readability + # abx-plugin-ripgrep-search + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search + # abx-plugin-title + # abx-plugin-wget + # abx-plugin-ytdlp + # abx-spec-archivebox + # abx-spec-config + # abx-spec-django + # abx-spec-extractor + # abx-spec-pydantic-pkgr + # abx-spec-searchbackend +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-archivedotorg + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-chrome + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-curl + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-favicon + # abx-plugin-title +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-default-binproviders + # via + # archivebox (pyproject.toml) + # abx-plugin-git + # abx-plugin-npm + # abx-plugin-pip +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-favicon + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-git + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-htmltotext + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ldap-auth + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-mercury + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-npm + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-pip + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-playwright + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-puppeteer + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-readability + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ripgrep-search + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-singlefile + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sonic-search + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sqlitefts-search + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-title + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-wget + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ytdlp + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-archivebox + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-config + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-favicon + # abx-plugin-git + # abx-plugin-htmltotext + # abx-plugin-ldap-auth + # abx-plugin-mercury + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-readability + # abx-plugin-ripgrep-search + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search + # abx-plugin-title + # abx-plugin-wget + # abx-plugin-ytdlp +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-django + # via + # archivebox (pyproject.toml) + # abx-plugin-ldap-auth +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-extractor + # via archivebox (pyproject.toml) +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-pydantic-pkgr + # via + # archivebox (pyproject.toml) + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-default-binproviders + # abx-plugin-git + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-wget + # abx-plugin-ytdlp +-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-searchbackend + # via + # archivebox (pyproject.toml) + # abx-plugin-ripgrep-search + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search annotated-types==0.7.0 # via pydantic anyio==4.6.2.post1 @@ -29,7 +161,7 @@ beautifulsoup4==4.12.3 # via python-benedict brotli==1.1.0 # via yt-dlp -bx-django-utils==79 +bx-django-utils==81 # via django-huey-monitor bx-py-utils==104 # via @@ -49,7 +181,7 @@ charset-normalizer==3.4.0 # via requests constantly==23.10.4 # via twisted -croniter==3.0.3 +croniter==4.0.0 # via archivebox (pyproject.toml) cryptography==43.0.3 # via @@ -62,15 +194,22 @@ daphne==4.1.2 dateparser==1.2.0 # via archivebox (pyproject.toml) decorator==5.1.1 - # via ipython + # via + # ipdb + # ipython django==5.1.2 # via # archivebox (pyproject.toml) + # abx + # abx-plugin-pip + # abx-spec-archivebox + # abx-spec-django # bx-django-utils # channels # django-admin-data-views # django-auth-ldap # django-charid-field + # django-debug-toolbar # django-extensions # django-huey # django-huey-monitor @@ -81,12 +220,15 @@ django==5.1.2 # django-stubs # django-stubs-ext # django-taggit + # requests-tracker django-admin-data-views==0.4.1 # via archivebox (pyproject.toml) django-auth-ldap==5.1.0 # via archivebox (pyproject.toml) django-charid-field==0.4 # via archivebox (pyproject.toml) +django-debug-toolbar==4.4.6 + # via archivebox (pyproject.toml) django-extensions==3.2.3 # via archivebox (pyproject.toml) django-huey==1.2.1 @@ -107,19 +249,21 @@ django-settings-holder==0.1.2 # django-signal-webhooks django-signal-webhooks==0.3.0 # via archivebox (pyproject.toml) -django-stubs==5.1.0 +django-stubs==5.1.1 # via archivebox (pyproject.toml) -django-stubs-ext==5.1.0 +django-stubs-ext==5.1.1 # via django-stubs django-taggit==6.1.0 # via archivebox (pyproject.toml) -et-xmlfile==1.1.0 +djdt-flamegraph==0.2.13 + # via archivebox (pyproject.toml) +et-xmlfile==2.0.0 # via openpyxl executing==2.1.0 # via stack-data feedparser==6.0.11 # via archivebox (pyproject.toml) -ftfy==6.3.0 +ftfy==6.3.1 # via python-benedict h11==0.14.0 # via httpcore @@ -144,8 +288,12 @@ idna==3.10 # twisted incremental==24.7.2 # via twisted -ipython==8.28.0 +ipdb==0.13.13 # via archivebox (pyproject.toml) +ipython==8.29.0 + # via + # archivebox (pyproject.toml) + # ipdb jedi==0.19.1 # via ipython mailchecker==6.0.11 @@ -169,8 +317,14 @@ pexpect==4.9.0 phonenumbers==8.13.48 # via python-benedict platformdirs==4.3.6 - # via pydantic-pkgr + # via + # archivebox (pyproject.toml) + # pydantic-pkgr pluggy==1.5.0 + # via + # archivebox (pyproject.toml) + # abx +pocket==0.3.6 # via archivebox (pyproject.toml) prompt-toolkit==3.0.48 # via ipython @@ -197,6 +351,10 @@ pycryptodomex==3.21.0 # via yt-dlp pydantic==2.9.2 # via + # abx-plugin-playwright + # abx-spec-config + # abx-spec-extractor + # abx-spec-searchbackend # django-ninja # django-pydantic-field # pydantic-pkgr @@ -206,9 +364,21 @@ pydantic-core==2.23.4 # pydantic # pydantic-pkgr pydantic-pkgr==0.5.4 - # via archivebox (pyproject.toml) + # via + # archivebox (pyproject.toml) + # abx-plugin-default-binproviders + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-ytdlp + # abx-spec-pydantic-pkgr pydantic-settings==2.6.0 - # via archivebox (pyproject.toml) + # via + # archivebox (pyproject.toml) + # abx-spec-config pygments==2.18.0 # via # ipython @@ -216,7 +386,11 @@ pygments==2.18.0 pyopenssl==24.2.1 # via twisted python-benedict==0.34.0 - # via archivebox (pyproject.toml) + # via + # archivebox (pyproject.toml) + # abx-spec-config + # abx-spec-extractor + # abx-spec-searchbackend python-crontab==3.2.0 # via archivebox (pyproject.toml) python-dateutil==2.9.0.post0 @@ -248,15 +422,19 @@ regex==2024.9.11 requests==2.32.3 # via # archivebox (pyproject.toml) + # pocket # python-benedict # yt-dlp +requests-tracker==0.3.3 + # via archivebox (pyproject.toml) rich==13.9.3 # via # archivebox (pyproject.toml) + # abx-spec-config # rich-argparse rich-argparse==1.5.2 # via archivebox (pyproject.toml) -service-identity==24.1.0 +service-identity==24.2.0 # via twisted setuptools==75.2.0 # via @@ -280,7 +458,10 @@ sonic-client==1.0.0 soupsieve==2.6 # via beautifulsoup4 sqlparse==0.5.1 - # via django + # via + # django + # django-debug-toolbar + # requests-tracker stack-data==0.6.3 # via ipython supervisor==4.2.5 @@ -293,7 +474,7 @@ traitlets==5.14.3 # via # ipython # matplotlib-inline -twisted==24.7.0 +twisted==24.10.0 # via daphne txaio==23.1.1 # via autobahn @@ -303,6 +484,7 @@ types-pyyaml==6.0.12.20240917 # via django-stubs typing-extensions==4.12.2 # via + # archivebox (pyproject.toml) # django-pydantic-field # django-stubs # django-stubs-ext diff --git a/uv.lock b/uv.lock index 349d5bd2..e2f86cf6 100644 --- a/uv.lock +++ b/uv.lock @@ -44,7 +44,7 @@ members = [ [[package]] name = "abx" version = "0.1.0" -source = { editable = "archivebox/vendor/abx" } +source = { editable = "archivebox/pkgs/abx" } dependencies = [ { name = "django" }, { name = "pluggy" }, @@ -59,7 +59,7 @@ requires-dist = [ [[package]] name = "abx-plugin-archivedotorg" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-archivedotorg" } +source = { editable = "archivebox/pkgs/abx-plugin-archivedotorg" } dependencies = [ { name = "abx" }, { name = "abx-plugin-curl" }, @@ -68,15 +68,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-chrome" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-chrome" } +source = { editable = "archivebox/pkgs/abx-plugin-chrome" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -85,15 +85,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, ] [[package]] name = "abx-plugin-curl" version = "2024.10.24" -source = { editable = "archivebox/vendor/abx-plugin-curl" } +source = { editable = "archivebox/pkgs/abx-plugin-curl" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -102,15 +102,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, ] [[package]] name = "abx-plugin-default-binproviders" version = "2024.10.24" -source = { editable = "archivebox/vendor/abx-plugin-default-binproviders" } +source = { editable = "archivebox/pkgs/abx-plugin-default-binproviders" } dependencies = [ { name = "abx" }, { name = "abx-spec-pydantic-pkgr" }, @@ -119,15 +119,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-plugin-favicon" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-favicon" } +source = { editable = "archivebox/pkgs/abx-plugin-favicon" } dependencies = [ { name = "abx" }, { name = "abx-plugin-curl" }, @@ -136,15 +136,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-git" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-git" } +source = { editable = "archivebox/pkgs/abx-plugin-git" } dependencies = [ { name = "abx" }, { name = "abx-plugin-default-binproviders" }, @@ -154,16 +154,16 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, ] [[package]] name = "abx-plugin-htmltotext" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-htmltotext" } +source = { editable = "archivebox/pkgs/abx-plugin-htmltotext" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -171,14 +171,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-ldap-auth" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-ldap-auth" } +source = { editable = "archivebox/pkgs/abx-plugin-ldap-auth" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -187,15 +187,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-django", editable = "archivebox/vendor/abx-spec-django" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" }, ] [[package]] name = "abx-plugin-mercury" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-mercury" } +source = { editable = "archivebox/pkgs/abx-plugin-mercury" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -203,14 +203,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-npm" version = "2024.10.24" -source = { editable = "archivebox/vendor/abx-plugin-npm" } +source = { editable = "archivebox/pkgs/abx-plugin-npm" } dependencies = [ { name = "abx" }, { name = "abx-plugin-default-binproviders" }, @@ -221,17 +221,17 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-plugin-pip" version = "2024.10.24" -source = { editable = "archivebox/vendor/abx-plugin-pip" } +source = { editable = "archivebox/pkgs/abx-plugin-pip" } dependencies = [ { name = "abx" }, { name = "abx-plugin-default-binproviders" }, @@ -243,10 +243,10 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "django", specifier = ">=5.0.0" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] @@ -254,7 +254,7 @@ requires-dist = [ [[package]] name = "abx-plugin-playwright" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-playwright" } +source = { editable = "archivebox/pkgs/abx-plugin-playwright" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -265,9 +265,9 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic", specifier = ">=2.4.2" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] @@ -275,7 +275,7 @@ requires-dist = [ [[package]] name = "abx-plugin-pocket" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-pocket" } +source = { editable = "archivebox/pkgs/abx-plugin-pocket" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -284,15 +284,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, { name = "pocket", specifier = ">=0.3.6" }, ] [[package]] name = "abx-plugin-puppeteer" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-puppeteer" } +source = { editable = "archivebox/pkgs/abx-plugin-puppeteer" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -302,16 +302,16 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-plugin-readability" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-readability" } +source = { editable = "archivebox/pkgs/abx-plugin-readability" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -319,14 +319,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-readwise" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-readwise" } +source = { editable = "archivebox/pkgs/abx-plugin-readwise" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -334,14 +334,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-ripgrep-search" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-ripgrep-search" } +source = { editable = "archivebox/pkgs/abx-plugin-ripgrep-search" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -350,15 +350,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, ] [[package]] name = "abx-plugin-singlefile" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-singlefile" } +source = { editable = "archivebox/pkgs/abx-plugin-singlefile" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -368,16 +368,16 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-plugin-sonic-search" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-sonic-search" } +source = { editable = "archivebox/pkgs/abx-plugin-sonic-search" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -388,17 +388,17 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-plugin-sqlitefts-search" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-sqlitefts-search" } +source = { editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -407,15 +407,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, ] [[package]] name = "abx-plugin-title" version = "2024.10.27" -source = { editable = "archivebox/vendor/abx-plugin-title" } +source = { editable = "archivebox/pkgs/abx-plugin-title" } dependencies = [ { name = "abx" }, { name = "abx-plugin-curl" }, @@ -424,15 +424,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, ] [[package]] name = "abx-plugin-wget" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-wget" } +source = { editable = "archivebox/pkgs/abx-plugin-wget" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -441,15 +441,15 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, ] [[package]] name = "abx-plugin-ytdlp" version = "2024.10.28" -source = { editable = "archivebox/vendor/abx-plugin-ytdlp" } +source = { editable = "archivebox/pkgs/abx-plugin-ytdlp" } dependencies = [ { name = "abx" }, { name = "abx-spec-config" }, @@ -459,16 +459,16 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-spec-archivebox" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-archivebox" } +source = { editable = "archivebox/pkgs/abx-spec-archivebox" } dependencies = [ { name = "abx" }, { name = "django" }, @@ -476,14 +476,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "django", specifier = ">=5.1.1,<6.0" }, ] [[package]] name = "abx-spec-config" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-config" } +source = { editable = "archivebox/pkgs/abx-spec-config" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -494,7 +494,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "pydantic", specifier = ">=2.9.2" }, { name = "pydantic-settings", specifier = ">=2.6.0" }, { name = "python-benedict", specifier = ">=0.34.0" }, @@ -504,7 +504,7 @@ requires-dist = [ [[package]] name = "abx-spec-django" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-django" } +source = { editable = "archivebox/pkgs/abx-spec-django" } dependencies = [ { name = "abx" }, { name = "django" }, @@ -512,14 +512,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "django", specifier = ">=5.1.1,<6.0" }, ] [[package]] name = "abx-spec-extractor" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-extractor" } +source = { editable = "archivebox/pkgs/abx-spec-extractor" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -528,7 +528,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "pydantic", specifier = ">=2.5.0" }, { name = "python-benedict", specifier = ">=0.26.0" }, ] @@ -536,7 +536,7 @@ requires-dist = [ [[package]] name = "abx-spec-pydantic-pkgr" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-pydantic-pkgr" } +source = { editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" } dependencies = [ { name = "abx" }, { name = "pydantic-pkgr" }, @@ -544,14 +544,14 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, ] [[package]] name = "abx-spec-searchbackend" version = "0.1.0" -source = { editable = "archivebox/vendor/abx-spec-searchbackend" } +source = { editable = "archivebox/pkgs/abx-spec-searchbackend" } dependencies = [ { name = "abx" }, { name = "pydantic" }, @@ -560,7 +560,7 @@ dependencies = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, { name = "pydantic", specifier = ">=2.5.0" }, { name = "python-benedict", specifier = ">=0.26.0" }, ] @@ -600,7 +600,7 @@ wheels = [ [[package]] name = "archivebox" -version = "0.8.5rc53" +version = "0.8.6rc0" source = { editable = "." } dependencies = [ { name = "abx" }, @@ -722,34 +722,34 @@ dev = [ [package.metadata] requires-dist = [ - { name = "abx", editable = "archivebox/vendor/abx" }, - { name = "abx-plugin-archivedotorg", editable = "archivebox/vendor/abx-plugin-archivedotorg" }, - { name = "abx-plugin-chrome", editable = "archivebox/vendor/abx-plugin-chrome" }, - { name = "abx-plugin-curl", editable = "archivebox/vendor/abx-plugin-curl" }, - { name = "abx-plugin-default-binproviders", editable = "archivebox/vendor/abx-plugin-default-binproviders" }, - { name = "abx-plugin-favicon", editable = "archivebox/vendor/abx-plugin-favicon" }, - { name = "abx-plugin-git", editable = "archivebox/vendor/abx-plugin-git" }, - { name = "abx-plugin-htmltotext", editable = "archivebox/vendor/abx-plugin-htmltotext" }, - { name = "abx-plugin-ldap-auth", editable = "archivebox/vendor/abx-plugin-ldap-auth" }, - { name = "abx-plugin-mercury", editable = "archivebox/vendor/abx-plugin-mercury" }, - { name = "abx-plugin-npm", editable = "archivebox/vendor/abx-plugin-npm" }, - { name = "abx-plugin-pip", editable = "archivebox/vendor/abx-plugin-pip" }, - { name = "abx-plugin-playwright", editable = "archivebox/vendor/abx-plugin-playwright" }, - { name = "abx-plugin-puppeteer", editable = "archivebox/vendor/abx-plugin-puppeteer" }, - { name = "abx-plugin-readability", editable = "archivebox/vendor/abx-plugin-readability" }, - { name = "abx-plugin-ripgrep-search", editable = "archivebox/vendor/abx-plugin-ripgrep-search" }, - { name = "abx-plugin-singlefile", editable = "archivebox/vendor/abx-plugin-singlefile" }, - { name = "abx-plugin-sonic-search", editable = "archivebox/vendor/abx-plugin-sonic-search" }, - { name = "abx-plugin-sqlitefts-search", editable = "archivebox/vendor/abx-plugin-sqlitefts-search" }, - { name = "abx-plugin-title", editable = "archivebox/vendor/abx-plugin-title" }, - { name = "abx-plugin-wget", editable = "archivebox/vendor/abx-plugin-wget" }, - { name = "abx-plugin-ytdlp", editable = "archivebox/vendor/abx-plugin-ytdlp" }, - { name = "abx-spec-archivebox", editable = "archivebox/vendor/abx-spec-archivebox" }, - { name = "abx-spec-config", editable = "archivebox/vendor/abx-spec-config" }, - { name = "abx-spec-django", editable = "archivebox/vendor/abx-spec-django" }, - { name = "abx-spec-extractor", editable = "archivebox/vendor/abx-spec-extractor" }, - { name = "abx-spec-pydantic-pkgr", editable = "archivebox/vendor/abx-spec-pydantic-pkgr" }, - { name = "abx-spec-searchbackend", editable = "archivebox/vendor/abx-spec-searchbackend" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-archivedotorg", editable = "archivebox/pkgs/abx-plugin-archivedotorg" }, + { name = "abx-plugin-chrome", editable = "archivebox/pkgs/abx-plugin-chrome" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-plugin-favicon", editable = "archivebox/pkgs/abx-plugin-favicon" }, + { name = "abx-plugin-git", editable = "archivebox/pkgs/abx-plugin-git" }, + { name = "abx-plugin-htmltotext", editable = "archivebox/pkgs/abx-plugin-htmltotext" }, + { name = "abx-plugin-ldap-auth", editable = "archivebox/pkgs/abx-plugin-ldap-auth" }, + { name = "abx-plugin-mercury", editable = "archivebox/pkgs/abx-plugin-mercury" }, + { name = "abx-plugin-npm", editable = "archivebox/pkgs/abx-plugin-npm" }, + { name = "abx-plugin-pip", editable = "archivebox/pkgs/abx-plugin-pip" }, + { name = "abx-plugin-playwright", editable = "archivebox/pkgs/abx-plugin-playwright" }, + { name = "abx-plugin-puppeteer", editable = "archivebox/pkgs/abx-plugin-puppeteer" }, + { name = "abx-plugin-readability", editable = "archivebox/pkgs/abx-plugin-readability" }, + { name = "abx-plugin-ripgrep-search", editable = "archivebox/pkgs/abx-plugin-ripgrep-search" }, + { name = "abx-plugin-singlefile", editable = "archivebox/pkgs/abx-plugin-singlefile" }, + { name = "abx-plugin-sonic-search", editable = "archivebox/pkgs/abx-plugin-sonic-search" }, + { name = "abx-plugin-sqlitefts-search", editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" }, + { name = "abx-plugin-title", editable = "archivebox/pkgs/abx-plugin-title" }, + { name = "abx-plugin-wget", editable = "archivebox/pkgs/abx-plugin-wget" }, + { name = "abx-plugin-ytdlp", editable = "archivebox/pkgs/abx-plugin-ytdlp" }, + { name = "abx-spec-archivebox", editable = "archivebox/pkgs/abx-spec-archivebox" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" }, + { name = "abx-spec-extractor", editable = "archivebox/pkgs/abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, { name = "archivebox", extras = ["sonic", "ldap", "debug"], marker = "extra == 'all'" }, { name = "atomicwrites", specifier = "==1.4.1" }, { name = "base32-crockford", specifier = "==0.3.0" }, @@ -1022,16 +1022,16 @@ wheels = [ [[package]] name = "bx-django-utils" -version = "79" +version = "81" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "bx-py-utils" }, { name = "django" }, { name = "python-stdnum" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/8e/d56ffeb8f39c176b03439f614526b0b7de2d298bbf3250d6fdd40521cc51/bx_django_utils-79.tar.gz", hash = "sha256:cb66087d4e9396281acf5a4394b749cff3062b66082d5726f6a8a342fdd35d0e", size = 190245 } +sdist = { url = "https://files.pythonhosted.org/packages/e7/4a/a4087420852629abd835a17f7d41eca9efa93453c6dcaa29697f40195021/bx_django_utils-81.tar.gz", hash = "sha256:0896f53d737ddda3e98085803e9f469abc4b84561d4062ec13aa40b14e9453b8", size = 192245 } wheels = [ - { url = "https://files.pythonhosted.org/packages/21/a1/dc24b907e2671512826d3c6593f79e4f78f8fc85544fbbf54102bacc08c9/bx_django_utils-79-py3-none-any.whl", hash = "sha256:d50b10ace24b0b363574542faecf04a81029e2fec6d6e6525fe063ed06238e04", size = 199326 }, + { url = "https://files.pythonhosted.org/packages/28/8e/692dce1f10303c6f4a03f5c2ae646d36b555c6190f17e11a2a469f9bdc48/bx_django_utils-81-py3-none-any.whl", hash = "sha256:b7ca9a801f0a160fd68c5744b7449552a3029484c373b8aaa2f41d0d50431b51", size = 199480 }, ] [[package]] @@ -1225,15 +1225,15 @@ wheels = [ [[package]] name = "croniter" -version = "3.0.3" +version = "4.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "python-dateutil" }, { name = "pytz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/44/7a/14b0b14ab0203e2c79493cf487829dc294d5c44bedc810ab2f4a97fc9ff4/croniter-3.0.3.tar.gz", hash = "sha256:34117ec1741f10a7bd0ec3ad7d8f0eb8fa457a2feb9be32e6a2250e158957668", size = 53088 } +sdist = { url = "https://files.pythonhosted.org/packages/c7/7c/ad5d5ecca499c00ac83c08fde57a49af929844b9c39d6256cc3fea30c940/croniter-4.0.0.tar.gz", hash = "sha256:3df735b70d005b68414d80728987ecba4d71054a1bb1b011f6d475bdefdbeddd", size = 55312 } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/6a/f2f68e0f9cf702b6d055ab53cab0d8c100f04e86228ca500a8ca9de94b58/croniter-3.0.3-py2.py3-none-any.whl", hash = "sha256:b3bd11f270dc54ccd1f2397b813436015a86d30ffc5a7a9438eec1ed916f2101", size = 22422 }, + { url = "https://files.pythonhosted.org/packages/4e/a5/b4cbef8a130382a84fc9342c2d1b2551dcf2817e768bb3e2881b71617119/croniter-4.0.0-py2.py3-none-any.whl", hash = "sha256:312717b8017ad6052817934af563f6754b1f25cb942dcca1e617240aca4aa254", size = 23411 }, ] [[package]] @@ -1513,7 +1513,7 @@ wheels = [ [[package]] name = "django-stubs" -version = "5.1.0" +version = "5.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "asgiref" }, @@ -1523,22 +1523,22 @@ dependencies = [ { name = "types-pyyaml" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/47/35/fa62c35c48e77bc4dabfe56d16786a2e9965ff89c4c55ab909c2d9f00ce8/django_stubs-5.1.0.tar.gz", hash = "sha256:86128c228b65e6c9a85e5dc56eb1c6f41125917dae0e21e6cfecdf1b27e630c5", size = 265839 } +sdist = { url = "https://files.pythonhosted.org/packages/bf/60/1ae90eb6e2e107bc64a3de9de78a5add7f3b85e491113504eed38d6d2c63/django_stubs-5.1.1.tar.gz", hash = "sha256:126d354bbdff4906c4e93e6361197f6fbfb6231c3df6def85a291dae6f9f577b", size = 265624 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/d8/4561cf32a652f12d1f6edf27ac1ed6194540b44592cc85ead62a1f6fdff6/django_stubs-5.1.0-py3-none-any.whl", hash = "sha256:b98d49a80aa4adf1433a97407102d068de26c739c405431d93faad96dd282c40", size = 470607 }, + { url = "https://files.pythonhosted.org/packages/98/c8/3081d5f994351248fcd60f9aab10cb2020bdd7df0f14e80854373e15d7d4/django_stubs-5.1.1-py3-none-any.whl", hash = "sha256:c4dc64260bd72e6d32b9e536e8dd0d9247922f0271f82d1d5132a18f24b388ac", size = 470790 }, ] [[package]] name = "django-stubs-ext" -version = "5.1.0" +version = "5.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/a5/dfb120bf3ce1f0da109481605f704ffe72533f056f42e8cffd5a486504a7/django_stubs_ext-5.1.0.tar.gz", hash = "sha256:ed7d51c0b731651879fc75f331fb0806d98b67bfab464e96e2724db6b46ef926", size = 9491 } +sdist = { url = "https://files.pythonhosted.org/packages/ca/62/a7129909d3c94eac957c02eeb05ac57cbca81db4f3f6270a8503697f376a/django_stubs_ext-5.1.1.tar.gz", hash = "sha256:db7364e4f50ae7e5360993dbd58a3a57ea4b2e7e5bab0fbd525ccdb3e7975d1c", size = 9455 } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/03/20a5a19d1b8d26eddd8420261304ee9e6accd802f5332e360daaa2202afb/django_stubs_ext-5.1.0-py3-none-any.whl", hash = "sha256:a455fc222c90b30b29ad8c53319559f5b54a99b4197205ddbb385aede03b395d", size = 8966 }, + { url = "https://files.pythonhosted.org/packages/6a/ed/f79ae5ad993bdf900d61892d2a9fc0145441a507a7579890fb8e21e4a7bc/django_stubs_ext-5.1.1-py3-none-any.whl", hash = "sha256:3907f99e178c93323e2ce908aef8352adb8c047605161f8d9e5e7b4efb5a6a9c", size = 8965 }, ] [[package]] @@ -1573,11 +1573,11 @@ wheels = [ [[package]] name = "et-xmlfile" -version = "1.1.0" +version = "2.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3d/5d/0413a31d184a20c763ad741cc7852a659bf15094c24840c5bdd1754765cd/et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c", size = 3218 } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/c2/3dd434b0108730014f1b96fd286040dc3bcb70066346f7e01ec2ac95865f/et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada", size = 4688 }, + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 }, ] [[package]] @@ -1626,14 +1626,14 @@ wheels = [ [[package]] name = "ftfy" -version = "6.3.0" +version = "6.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wcwidth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/85/c3/63753eca4c5257ce0561cb5f8e9cd0d45d97848c73c56e33a0a764319e5b/ftfy-6.3.0.tar.gz", hash = "sha256:1c7d6418e72b25a7760feb150acf574b86924dbb2e95b32c0b3abbd1ba3d7ad6", size = 362118 } +sdist = { url = "https://files.pythonhosted.org/packages/a5/d3/8650919bc3c7c6e90ee3fa7fd618bf373cbbe55dff043bd67353dbb20cd8/ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec", size = 308927 } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/0f/d8a8152e720cbcad890e56ee98639ff489f1992869b4cf304c3fa24d4bcc/ftfy-6.3.0-py3-none-any.whl", hash = "sha256:17aca296801f44142e3ff2c16f93fbf6a87609ebb3704a9a41dd5d4903396caf", size = 44778 }, + { url = "https://files.pythonhosted.org/packages/ab/6e/81d47999aebc1b155f81eca4477a616a70f238a2549848c38983f3c22a82/ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083", size = 44821 }, ] [[package]] @@ -1785,7 +1785,7 @@ wheels = [ [[package]] name = "ipython" -version = "8.28.0" +version = "8.29.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1800,9 +1800,9 @@ dependencies = [ { name = "traitlets" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/21/48db7d9dd622b9692575004c7c98f85f5629428f58596c59606d36c51b58/ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a", size = 5495762 } +sdist = { url = "https://files.pythonhosted.org/packages/85/e0/a3f36dde97e12121106807d80485423ae4c5b27ce60d40d4ab0bab18a9db/ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb", size = 5497513 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/3a/5d8680279ada9571de8469220069d27024ee47624af534e537c9ff49a450/ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35", size = 819456 }, + { url = "https://files.pythonhosted.org/packages/c5/a5/c15ed187f1b3fac445bb42a2dedd8dec1eee1718b35129242049a13a962f/ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8", size = 819911 }, ] [[package]] @@ -1866,7 +1866,7 @@ wheels = [ [[package]] name = "logfire" -version = "1.2.0" +version = "1.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "executing" }, @@ -1878,9 +1878,9 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/85/ce3e4ecc59a0126eaa9802f7d928d6efa837e63619dfec37654fb2d1f1c1/logfire-1.2.0.tar.gz", hash = "sha256:71866c4ce2f604b307ff0cc1a9b1254ea68b3c46f42bffd6ac36fc4db5abb62b", size = 240418 } +sdist = { url = "https://files.pythonhosted.org/packages/66/41/7b563b7db2490ba9090edd0ba7b70ad9a003bfd608dc132edf5f1b394140/logfire-1.3.1.tar.gz", hash = "sha256:73f9ff2691c927fc3ad28da5308ffd43af30d9bb8efedef52266a797da878381", size = 242221 } wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/7f/37d9c3cbed1ef23b467c0c0039f35524595f8fd79f3acb54e647a0ccd590/logfire-1.2.0-py3-none-any.whl", hash = "sha256:edb2b441e418cf31877bd97e24b3755f873bb423f834cca66f315b25bde61ebd", size = 164724 }, + { url = "https://files.pythonhosted.org/packages/56/6e/2d1a1b116733e930e8a20e2263cc5a9968d51ef546cc473895c1b5252ee0/logfire-1.3.1-py3-none-any.whl", hash = "sha256:974657b9d775a65b5c526550baa95c121257a907ab5d9e8c99cbb715562c2673", size = 164833 }, ] [package.optional-dependencies] @@ -2273,11 +2273,11 @@ wheels = [ [[package]] name = "pip" -version = "24.2" +version = "24.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4d/87/fb90046e096a03aeab235e139436b3fe804cdd447ed2093b0d70eba3f7f8/pip-24.2.tar.gz", hash = "sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8", size = 1922041 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/b1/b422acd212ad7eedddaf7981eee6e5de085154ff726459cf2da7c5a184c1/pip-24.3.1.tar.gz", hash = "sha256:ebcb60557f2aefabc2e0f918751cd24ea0d56d8ec5445fe1807f1d2109660b99", size = 1931073 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/55/90db48d85f7689ec6f81c0db0622d704306c5284850383c090e6c7195a5c/pip-24.2-py3-none-any.whl", hash = "sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2", size = 1815170 }, + { url = "https://files.pythonhosted.org/packages/ef/7d/500c9ad20238fcfcb4cb9243eede163594d7020ce87bd9610c9e02771876/pip-24.3.1-py3-none-any.whl", hash = "sha256:3790624780082365f47549d032f3770eeb2b1e8bd1f7b2e02dace1afa361b4ed", size = 1822182 }, ] [[package]] @@ -2339,6 +2339,8 @@ version = "6.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/26/10/2a30b13c61e7cf937f4adf90710776b7918ed0a9c434e2c38224732af310/psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a", size = 508565 } wheels = [ + { url = "https://files.pythonhosted.org/packages/da/2b/f4dea5d993d9cd22ad958eea828a41d5d225556123d372f02547c29c4f97/psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e", size = 246648 }, + { url = "https://files.pythonhosted.org/packages/9f/14/4aa97a7f2e0ac33a050d990ab31686d651ae4ef8c86661fef067f00437b9/psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85", size = 249905 }, { url = "https://files.pythonhosted.org/packages/01/9e/8be43078a171381953cfee33c07c0d628594b5dbfc5157847b85022c2c1b/psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688", size = 247762 }, { url = "https://files.pythonhosted.org/packages/1d/cb/313e80644ea407f04f6602a9e23096540d9dc1878755f3952ea8d3d104be/psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e", size = 248777 }, { url = "https://files.pythonhosted.org/packages/65/8e/bcbe2025c587b5d703369b6a75b65d41d1367553da6e3f788aff91eaf5bd/psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38", size = 284259 }, @@ -2892,32 +2894,32 @@ wheels = [ [[package]] name = "ruff" -version = "0.7.0" +version = "0.7.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/c7/f3367d1da5d568192968c5c9e7f3d51fb317b9ac04828493b23d8fce8ce6/ruff-0.7.0.tar.gz", hash = "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b", size = 3146645 } +sdist = { url = "https://files.pythonhosted.org/packages/a6/21/5c6e05e0fd3fbb41be4fb92edbc9a04de70baf60adb61435ce0c6b8c3d55/ruff-0.7.1.tar.gz", hash = "sha256:9d8a41d4aa2dad1575adb98a82870cf5db5f76b2938cf2206c22c940034a36f4", size = 3181670 } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/59/a0275a0913f3539498d116046dd679cd657fe3b7caf5afe1733319414932/ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628", size = 10434007 }, - { url = "https://files.pythonhosted.org/packages/cd/94/da0ba5f956d04c90dd899209904210600009dcda039ce840d83eb4298c7d/ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737", size = 10048066 }, - { url = "https://files.pythonhosted.org/packages/57/1d/e5cc149ecc46e4f203403a79ccd170fad52d316f98b87d0f63b1945567db/ruff-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06", size = 9711389 }, - { url = "https://files.pythonhosted.org/packages/05/67/fb7ea2c869c539725a16c5bc294e9aa34f8b1b6fe702f1d173a5da517c2b/ruff-0.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be", size = 10755174 }, - { url = "https://files.pythonhosted.org/packages/5f/f0/13703bc50536a0613ea3dce991116e5f0917a1f05528c6ab738b33c08d3f/ruff-0.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa", size = 10196040 }, - { url = "https://files.pythonhosted.org/packages/99/c1/77b04ab20324ab03d333522ee55fb0f1c38e3ca0d326b4905f82ce6b6c70/ruff-0.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495", size = 11033684 }, - { url = "https://files.pythonhosted.org/packages/f2/97/f463334dc4efeea3551cd109163df15561c18a1c3ec13d51643740fd36ba/ruff-0.7.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598", size = 11803700 }, - { url = "https://files.pythonhosted.org/packages/b4/f8/a31d40c4bb92933d376a53e7c5d0245d9b27841357e4820e96d38f54b480/ruff-0.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e", size = 11347848 }, - { url = "https://files.pythonhosted.org/packages/83/62/0c133b35ddaf91c65c30a56718b80bdef36bfffc35684d29e3a4878e0ea3/ruff-0.7.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914", size = 12480632 }, - { url = "https://files.pythonhosted.org/packages/46/96/464058dd1d980014fb5aa0a1254e78799efb3096fc7a4823cd66a1621276/ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9", size = 10941919 }, - { url = "https://files.pythonhosted.org/packages/a0/f7/bda37ec77986a435dde44e1f59374aebf4282a5fa9cf17735315b847141f/ruff-0.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4", size = 10745519 }, - { url = "https://files.pythonhosted.org/packages/c2/33/5f77fc317027c057b61a848020a47442a1cbf12e592df0e41e21f4d0f3bd/ruff-0.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9", size = 10284872 }, - { url = "https://files.pythonhosted.org/packages/ff/50/98aec292bc9537f640b8d031c55f3414bf15b6ed13b3e943fed75ac927b9/ruff-0.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d", size = 10600334 }, - { url = "https://files.pythonhosted.org/packages/f2/85/12607ae3201423a179b8cfadc7cb1e57d02cd0135e45bd0445acb4cef327/ruff-0.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11", size = 11017333 }, - { url = "https://files.pythonhosted.org/packages/d4/7f/3b85a56879e705d5f46ec14daf8a439fca05c3081720fe3dc3209100922d/ruff-0.7.0-py3-none-win32.whl", hash = "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec", size = 8570962 }, - { url = "https://files.pythonhosted.org/packages/39/9f/c5ee2b40d377354dabcc23cff47eb299de4b4d06d345068f8f8cc1eadac8/ruff-0.7.0-py3-none-win_amd64.whl", hash = "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2", size = 9365544 }, - { url = "https://files.pythonhosted.org/packages/89/8b/ee1509f60148cecba644aa718f6633216784302458340311898aaf0b1bed/ruff-0.7.0-py3-none-win_arm64.whl", hash = "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e", size = 8695763 }, + { url = "https://files.pythonhosted.org/packages/65/45/8a20a9920175c9c4892b2420f80ff3cf14949cf3067118e212f9acd9c908/ruff-0.7.1-py3-none-linux_armv6l.whl", hash = "sha256:cb1bc5ed9403daa7da05475d615739cc0212e861b7306f314379d958592aaa89", size = 10389268 }, + { url = "https://files.pythonhosted.org/packages/1b/d3/2f8382db2cf4f9488e938602e33e36287f9d26cb283aa31f11c31297ce79/ruff-0.7.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:27c1c52a8d199a257ff1e5582d078eab7145129aa02721815ca8fa4f9612dc35", size = 10188348 }, + { url = "https://files.pythonhosted.org/packages/a2/31/7d14e2a88da351200f844b7be889a0845d9e797162cf76b136d21b832a23/ruff-0.7.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:588a34e1ef2ea55b4ddfec26bbe76bc866e92523d8c6cdec5e8aceefeff02d99", size = 9841448 }, + { url = "https://files.pythonhosted.org/packages/db/99/738cafdc768eceeca0bd26c6f03e213aa91203d2278e1d95b1c31c4ece41/ruff-0.7.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94fc32f9cdf72dc75c451e5f072758b118ab8100727168a3df58502b43a599ca", size = 10674864 }, + { url = "https://files.pythonhosted.org/packages/fe/12/bcf2836b50eab53c65008383e7d55201e490d75167c474f14a16e1af47d2/ruff-0.7.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:985818742b833bffa543a84d1cc11b5e6871de1b4e0ac3060a59a2bae3969250", size = 10192105 }, + { url = "https://files.pythonhosted.org/packages/2b/71/261d5d668bf98b6c44e89bfb5dfa4cb8cb6c8b490a201a3d8030e136ea4f/ruff-0.7.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32f1e8a192e261366c702c5fb2ece9f68d26625f198a25c408861c16dc2dea9c", size = 11194144 }, + { url = "https://files.pythonhosted.org/packages/90/1f/0926d18a3b566fa6e7b3b36093088e4ffef6b6ba4ea85a462d9a93f7e35c/ruff-0.7.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:699085bf05819588551b11751eff33e9ca58b1b86a6843e1b082a7de40da1565", size = 11917066 }, + { url = "https://files.pythonhosted.org/packages/cd/a8/9fac41f128b6a44ab4409c1493430b4ee4b11521e8aeeca19bfe1ce851f9/ruff-0.7.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:344cc2b0814047dc8c3a8ff2cd1f3d808bb23c6658db830d25147339d9bf9ea7", size = 11458821 }, + { url = "https://files.pythonhosted.org/packages/25/cd/59644168f086ab13fe4e02943b9489a0aa710171f66b178e179df5383554/ruff-0.7.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4316bbf69d5a859cc937890c7ac7a6551252b6a01b1d2c97e8fc96e45a7c8b4a", size = 12700379 }, + { url = "https://files.pythonhosted.org/packages/fb/30/3bac63619eb97174661829c07fc46b2055a053dee72da29d7c304c1cd2c0/ruff-0.7.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:79d3af9dca4c56043e738a4d6dd1e9444b6d6c10598ac52d146e331eb155a8ad", size = 11019813 }, + { url = "https://files.pythonhosted.org/packages/4b/af/f567b885b5cb3bcdbcca3458ebf210cc8c9c7a9f61c332d3c2a050c3b21e/ruff-0.7.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:c5c121b46abde94a505175524e51891f829414e093cd8326d6e741ecfc0a9112", size = 10662146 }, + { url = "https://files.pythonhosted.org/packages/bc/ad/eb930d3ad117a9f2f7261969c21559ebd82bb13b6e8001c7caed0d44be5f/ruff-0.7.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8422104078324ea250886954e48f1373a8fe7de59283d747c3a7eca050b4e378", size = 10256911 }, + { url = "https://files.pythonhosted.org/packages/20/d5/af292ce70a016fcec792105ca67f768b403dd480a11888bc1f418fed0dd5/ruff-0.7.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:56aad830af8a9db644e80098fe4984a948e2b6fc2e73891538f43bbe478461b8", size = 10767488 }, + { url = "https://files.pythonhosted.org/packages/24/85/cc04a3bd027f433bebd2a097e63b3167653c079f7f13d8f9a1178e693412/ruff-0.7.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:658304f02f68d3a83c998ad8bf91f9b4f53e93e5412b8f2388359d55869727fd", size = 11093368 }, + { url = "https://files.pythonhosted.org/packages/0b/fb/c39cbf32d1f3e318674b8622f989417231794926b573f76dd4d0ca49f0f1/ruff-0.7.1-py3-none-win32.whl", hash = "sha256:b517a2011333eb7ce2d402652ecaa0ac1a30c114fbbd55c6b8ee466a7f600ee9", size = 8594180 }, + { url = "https://files.pythonhosted.org/packages/5a/71/ec8cdea34ecb90c830ca60d54ac7b509a7b5eab50fae27e001d4470fe813/ruff-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f38c41fcde1728736b4eb2b18850f6d1e3eedd9678c914dede554a70d5241307", size = 9419751 }, + { url = "https://files.pythonhosted.org/packages/79/7b/884553415e9f0a9bf358ed52fb68b934e67ef6c5a62397ace924a1afdf9a/ruff-0.7.1-py3-none-win_arm64.whl", hash = "sha256:19aa200ec824c0f36d0c9114c8ec0087082021732979a359d6f3c390a6ff2a37", size = 8717402 }, ] [[package]] name = "service-identity" -version = "24.1.0" +version = "24.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -2925,9 +2927,9 @@ dependencies = [ { name = "pyasn1" }, { name = "pyasn1-modules" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/d2/2ac20fd05f1b6fce31986536da4caeac51ed2e1bb25d4a7d73ca4eccdfab/service_identity-24.1.0.tar.gz", hash = "sha256:6829c9d62fb832c2e1c435629b0a8c476e1929881f28bee4d20bc24161009221", size = 40183 } +sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/92/44669afe6354a7bed9968013862118c401690d8b5a805bab75ac1764845f/service_identity-24.1.0-py3-none-any.whl", hash = "sha256:a28caf8130c8a5c1c7a6f5293faaf239bbfb7751e4862436920ee6f2616f568a", size = 12037 }, + { url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364 }, ] [[package]] @@ -3171,7 +3173,7 @@ wheels = [ [[package]] name = "twisted" -version = "24.7.0" +version = "24.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -3182,9 +3184,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "zope-interface" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/bf/f30eb89bcd14a21a36b4cd3d96658432d4c590af3c24bbe08ea77fa7bbbb/twisted-24.7.0.tar.gz", hash = "sha256:5a60147f044187a127ec7da96d170d49bcce50c6fd36f594e60f4587eff4d394", size = 3516844 } +sdist = { url = "https://files.pythonhosted.org/packages/b2/0f/2d0b0dcd52a849db64ff63619aead94ae1091fe4d4d7e100371efe513585/twisted-24.10.0.tar.gz", hash = "sha256:02951299672595fea0f70fa2d5f7b5e3d56836157eda68859a6ad6492d36756e", size = 3525999 } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/d2/7b3e869b983fbf29d770fc2893f8df7c1739c6ff03a2b926b4fc43e4263e/twisted-24.7.0-py3-none-any.whl", hash = "sha256:734832ef98108136e222b5230075b1079dad8a3fc5637319615619a7725b0c81", size = 3181556 }, + { url = "https://files.pythonhosted.org/packages/f9/7c/f80f6853d702782edb357190c42c3973f13c547a5f68ab1b17e6415061b8/twisted-24.10.0-py3-none-any.whl", hash = "sha256:67aa7c8aa94387385302acf44ade12967c747858c8bcce0f11d38077a11c5326", size = 3188753 }, ] [package.optional-dependencies] @@ -3283,27 +3285,27 @@ wheels = [ [[package]] name = "uv" -version = "0.4.26" +version = "0.4.28" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/cb/90/500da91a6d2fdad8060d27b0c2dd948bb807a7cfc5fe32abc90dfaeb363f/uv-0.4.26.tar.gz", hash = "sha256:e9f45d8765a037a13ddedebb9e36fdcf06b7957654cfa8055d84f19eba12957e", size = 2072287 } +sdist = { url = "https://files.pythonhosted.org/packages/67/5f/4c227d6ca86c1b27a27d953b0f6f7d867d16bf84802dbb2e03dacbcff629/uv-0.4.28.tar.gz", hash = "sha256:bc33e318b676aeba2ea8bcd1e8f38623272b891200cefc54f9c420f4f4091434", size = 2093199 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/1f/1e1af6656e83a9b0347c22328ad6d899760819e5f19fa80aee88b56d1e02/uv-0.4.26-py3-none-linux_armv6l.whl", hash = "sha256:d1ca5183afab454f28573a286811019b3552625af2cd1cd3996049d3bbfdb1ca", size = 13055731 }, - { url = "https://files.pythonhosted.org/packages/92/27/2235628adcf468bc6be98b84e509afa54240d359b4705454e7e957a9650d/uv-0.4.26-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:391a6f5e31b212cb72a8f460493bbdf4088e66049666ad064ac8530230031289", size = 13230933 }, - { url = "https://files.pythonhosted.org/packages/36/ce/dd9b312c2230705119d3de910a32bbd32dc500bf147c7a0076a31bdfd153/uv-0.4.26-py3-none-macosx_11_0_arm64.whl", hash = "sha256:acaa25b304db6f1e8064d3280532ecb80a58346e37f4199659269847848c4da0", size = 12266060 }, - { url = "https://files.pythonhosted.org/packages/4d/64/ef6532d84841f5e77e240df9a7dbdc3ca5bf45fae323f247b7bd57bea037/uv-0.4.26-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:2ddb60d508b668b8da055651b30ff56c1efb79d57b064c218a7622b5c74b2af8", size = 12539139 }, - { url = "https://files.pythonhosted.org/packages/1b/30/b4f98f5e28a8c41e370be1a6ef9d48a619e20d3caeb2bf437f1560fab2df/uv-0.4.26-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6f66f11e088d231b7e305f089dc949b0e6b1d65e0a877b50ba5c3ae26e151144", size = 12867987 }, - { url = "https://files.pythonhosted.org/packages/7f/5f/605fe50a0710a78013ad5b2b1034d8f056b5971fc023b6510a24e9350637/uv-0.4.26-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e086ebe200e9718e9622af405d45caad9d84b60824306fcb220335fe6fc90966", size = 13594669 }, - { url = "https://files.pythonhosted.org/packages/ae/4b/e3d02b963f9f83f76d1b0757204a210aceebe8ae16f69fcb431b09bc3926/uv-0.4.26-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:41f9876c22ad5b4518bffe9e50ec7169e242b64f139cdcaf42a76f70a9bd5c78", size = 14156314 }, - { url = "https://files.pythonhosted.org/packages/40/8e/7803d3b76d8694ba939509e49d0c37e70a6d580ef5b7f0242701533920e5/uv-0.4.26-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6091075420eda571b0377d351c393b096514cb036a3199e033e003edaa0ff880", size = 13897243 }, - { url = "https://files.pythonhosted.org/packages/97/ee/8d5b63b590d3cb9dae5ac396cc099dcad2e368794d77e34a52dd896e5d8e/uv-0.4.26-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1214caacc6b9f9c72749634c7a82a5d93123a44b70a1fa6a9d13993c126ca33e", size = 17961411 }, - { url = "https://files.pythonhosted.org/packages/da/9a/5a6a3ea6c2bc42904343897b666cb8c9ac921bf9551b463aeb592cd49d45/uv-0.4.26-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a63a6fe6f249a9fff72328204c3e6b457aae5914590e6881b9b39dcc72d24df", size = 13700388 }, - { url = "https://files.pythonhosted.org/packages/33/52/009ea704318c5d0f290fb2ea4e1874d5625a60b290c6e5e49aae4d140091/uv-0.4.26-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:c4c69532cb4d0c1e160883142b8bf0133a5a67e9aed5148e13743ae55c2dfc03", size = 12702036 }, - { url = "https://files.pythonhosted.org/packages/72/38/4dc590872e5c1810c6ec203d9b070278ed396a1ebf3396e556079946c894/uv-0.4.26-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:9560c2eb234ea92276bbc647854d4a9e75556981c1193c3cc59f6613f7d177f2", size = 12854127 }, - { url = "https://files.pythonhosted.org/packages/76/73/124820b37d1c8784fbebfc4b5b7812b4fa8e4e680c35b77a38be444dac9f/uv-0.4.26-py3-none-musllinux_1_1_i686.whl", hash = "sha256:a41bdd09b9a3ddc8f459c73e924485e1caae43e43305cedb65f5feac05cf184a", size = 13309009 }, - { url = "https://files.pythonhosted.org/packages/f4/e7/37cf24861c6f76ba85ac80c15c391848524668be8dcd218ed04da80a96b6/uv-0.4.26-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:23cee82020b9e973a5feba81c2cf359a5a09020216d98534926f45ee7b74521d", size = 15079442 }, - { url = "https://files.pythonhosted.org/packages/ca/ac/fa29079ee0c26c65efca5c447ef6ce66f0afca1f73c09d599229d2d9dfd4/uv-0.4.26-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:468f806e841229c0bd6e1cffaaffc064720704623890cee15b42b877cef748c5", size = 13827888 }, - { url = "https://files.pythonhosted.org/packages/40/e8/f9824ecb8b13da5e8b0e9b8fbc81edb9e0d41923ebc6e287ae2e5a04bc62/uv-0.4.26-py3-none-win32.whl", hash = "sha256:70a108399d6c9e3d1f4a0f105d6d016f97f292dbb6c724e1ed2e6dc9f6872c79", size = 13092190 }, - { url = "https://files.pythonhosted.org/packages/46/91/c76682177dbe46dc0cc9221f9483b186ad3d8e0b59056c2cdae5c011609c/uv-0.4.26-py3-none-win_amd64.whl", hash = "sha256:e826b544020ef407387ed734a89850cac011ee4b5daf94b4f616b71eff2c8a94", size = 14757412 }, + { url = "https://files.pythonhosted.org/packages/fa/47/ba9c8dba784f5fe822ce0591e4951504db8294e549ae9b8c0eabcf80372a/uv-0.4.28-py3-none-linux_armv6l.whl", hash = "sha256:524f38d996b51c27d1342af0d4e69c1524fbcfe57c8e036498811a5079fab070", size = 13207872 }, + { url = "https://files.pythonhosted.org/packages/3b/93/d19ab3a55778f97985203e3792ed79f53739d75f818a4792b0de07e29058/uv-0.4.28-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2c8c3a719d68181127fcf90c0e5d2a4b76bb405bf464e04c8bf5c6d356109cec", size = 13281267 }, + { url = "https://files.pythonhosted.org/packages/5a/4f/d672cb7467719b4b348e5380e59ac9dc2962b418f66167814e61f96f00a2/uv-0.4.28-py3-none-macosx_11_0_arm64.whl", hash = "sha256:e44e46aecf42e7d075d3428864c42598b3397fd4cdf5fbf198b38673870ac932", size = 12283215 }, + { url = "https://files.pythonhosted.org/packages/24/15/67755fb799c205f7eb3843f60980d794fc4fefe7596097f96bd911ded348/uv-0.4.28-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:7932026532a8294969777fa500dbd3c3a80aada14ac131d9696d596d31068550", size = 12599223 }, + { url = "https://files.pythonhosted.org/packages/50/69/f6e67de2aea1476953fd412a76d2f634369fcd0cdeb23fb64f4c14d1ccbb/uv-0.4.28-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d9b8543712257678a5ab7e6865486bc71903c231d151ad1aff663b1c25596744", size = 13014207 }, + { url = "https://files.pythonhosted.org/packages/26/55/14f9f2f1f38493107462b2e8055c25ad8e3eea0821475c9888e91abdfdb1/uv-0.4.28-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:274b5af065a1a3a37456e9f1a8c1c4e9b07825be1c4135d299e022fb0547de38", size = 13621543 }, + { url = "https://files.pythonhosted.org/packages/aa/09/4642cbcb8bcd74bb61089eca207c70dfde535c1a9c1933d809458781b7dc/uv-0.4.28-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:09a50416622b5df476be774739d1682db9079b7bc7493346c2085cf11b91706b", size = 14230000 }, + { url = "https://files.pythonhosted.org/packages/d3/bf/76f3a245f7c7ee9058202136f83fa9628f7642bbc95eaa50c974c8ad50f1/uv-0.4.28-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d12b58c945e4805f06b954475642049d97f69796b9a4c5742a6e0a281de0db9c", size = 13964366 }, + { url = "https://files.pythonhosted.org/packages/27/be/59613a92b6fc47b70ab08390f28a429d2aeeee71901e29ab6e069e05cbc9/uv-0.4.28-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ec1bf494dcf30984b5e6e8208d78a8a4e483855c45c3ea2b1d9e7201d8af00f", size = 18007523 }, + { url = "https://files.pythonhosted.org/packages/c6/55/1aad99613982f87eb9aacb405018365a256387232a12e00fabe3aff6536b/uv-0.4.28-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f6d4f95ceb4735a4c8f0555dda6761a57c8ee7fc1b6b7d7004d6a25a8aec38", size = 13764938 }, + { url = "https://files.pythonhosted.org/packages/00/24/560fa8d5c81df81d33bd61aa677a0c86502035b09a3332a9218e29ef56e3/uv-0.4.28-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:2e82236e655c5af1905d7ca15c3c96c28a878f2d77a2e4f714d5254baad85b2e", size = 12768270 }, + { url = "https://files.pythonhosted.org/packages/a3/74/8e35216352a8654ab4bfdf6ccd875433c7183ef725bb7947535378f01283/uv-0.4.28-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:a3c59d5a11e0ddf550e20ea10b5d26ed06acab1192d3b70fe3993444cfe8fd41", size = 13004268 }, + { url = "https://files.pythonhosted.org/packages/24/fb/5723f6b60e0089a1c53d638cbd9c73f8132d361eed68966b7a80afb289bd/uv-0.4.28-py3-none-musllinux_1_1_i686.whl", hash = "sha256:be1ce25068d24b42273182729dc1917654438797346a5d470606949ec344fb22", size = 13344079 }, + { url = "https://files.pythonhosted.org/packages/5f/8d/5cf54b786ce69788a00ffcb7e275bacff80dab72653a04fcd0214be03ec4/uv-0.4.28-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:dea9d143e52cc295c9da9840530629196b0dc24c71b31a880f2f979fe3f1d62e", size = 15247117 }, + { url = "https://files.pythonhosted.org/packages/fd/75/b22afa960bf8a74ec66bd35c702ad2552ad93edf3b231e9f29b96d0d519e/uv-0.4.28-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:6ea1fac8b9b8d785f66e2ab46296e6939a43ab85da538d3eea12a27dfefd84a6", size = 13899919 }, + { url = "https://files.pythonhosted.org/packages/56/30/c9691fc0d9bb8b5c7719429c4313591a251961e7e89b79b2e42349f3cf91/uv-0.4.28-py3-none-win32.whl", hash = "sha256:8a32af23fc619e1e70923a498c097ec6eb120e764315ba164fa7ab8a65af9ba3", size = 13124306 }, + { url = "https://files.pythonhosted.org/packages/45/86/7bb87d8b97fcf807e97f256bfe9d55e31ef788778ec9793f2c9a6a5d128a/uv-0.4.28-py3-none-win_amd64.whl", hash = "sha256:e680313c3b25eee9f9f521fab20746292cf6ef4e162e4f973e0758867702384f", size = 14886993 }, ] [[package]] From eb721bd514edcc7dbfc914cb632986c8cdf2ce84 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Mon, 28 Oct 2024 20:16:46 -0700 Subject: [PATCH 15/25] tweak parser imports --- archivebox/parsers/generic_jsonl.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py index 3af7356b..3948ba18 100644 --- a/archivebox/parsers/generic_jsonl.py +++ b/archivebox/parsers/generic_jsonl.py @@ -1,14 +1,11 @@ __package__ = 'archivebox.parsers' import json - from typing import IO, Iterable -from ..index.schema import Link -from archivebox.misc.util import ( - enforce_types, -) +from archivebox.misc.util import enforce_types +from ..index.schema import Link from .generic_json import jsonObjectToLink def parse_line(line: str): From 5efeb9d347c81ef359baf4591697f7b39188f4b9 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 29 Oct 2024 00:33:14 -0700 Subject: [PATCH 16/25] add get_SCOPE_CONFIG --- archivebox/abid_utils/models.py | 2 +- archivebox/main.py | 4 +- .../abx_plugin_pocket/config.py | 7 +- .../abx_spec_config/__init__.py | 85 +++++++++++++++++++ 4 files changed, 89 insertions(+), 9 deletions(-) diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index 6c7cfd0e..f36bfcd9 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -174,7 +174,7 @@ class ABIDModel(models.Model): 'uri': self.abid_uri_src, 'subtype': self.abid_subtype_src, 'rand': self.abid_rand_src, - 'salt': 'self.abid_salt', # defined as static class vars at build time + 'salt': 'self.abid_salt', # defined as static class vars at build time } @property diff --git a/archivebox/main.py b/archivebox/main.py index ce6347b2..9ce0b9bd 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -199,15 +199,13 @@ def version(quiet: bool=False, console = Console() prnt = console.print - from django.conf import settings - from abx_plugin_default_binproviders import apt, brew, env from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID from archivebox.config.paths import get_data_locations, get_code_locations - LDAP_ENABLED = archivebox.pm.hook.get_FLAT_CONFIG().LDAP_ENABLED + LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED # 0.7.1 diff --git a/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py index 2db072a1..31f691b2 100644 --- a/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py +++ b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py @@ -1,15 +1,12 @@ -__package__ = 'abx_plugin_pocket' - from typing import Dict - from pydantic import Field -from abx_spec_config.base_configset import BaseConfigSet +from abx_spec_config import BaseConfigSet class PocketConfig(BaseConfigSet): POCKET_CONSUMER_KEY: str | None = Field(default=None) - POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} + POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=dict) # {: , ...} POCKET_CONFIG = PocketConfig() diff --git a/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py index 3feaab82..6aeedb71 100644 --- a/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py +++ b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py @@ -51,6 +51,91 @@ class ConfigPluginSpec: for configset in pm.hook.get_CONFIGS().values() for key, value in benedict(configset).items() }) + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]: + """Get the config as it applies to you right now, based on the current context""" + return benedict({ + **pm.hook.get_default_config(default=default), + # **pm.hook.get_machine_config(machine), + **pm.hook.get_environment_config(environment=environment), + **pm.hook.get_collection_config(collection=collection), + **pm.hook.get_user_config(user=user), + **pm.hook.get_crawl_config(crawl=crawl), + **pm.hook.get_snapshot_config(snapshot=snapshot), + **pm.hook.get_archiveresult_config(archiveresult=archiveresult), + # **pm.hook.get_request_config(request=request), + **(extra or {}), + }) + + # @abx.hookspec(firstresult=True) + # @abx.hookimpl + # def get_request_config(self, request) -> dict: + # session = getattr(request, 'session', None) + # return getattr(session, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]: + return getattr(archiveresult, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]: + return getattr(snapshot, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]: + return getattr(crawl, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]: + return getattr(user, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file + # {} = empty dict, override to ignore the collection config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_collection().items() + }) if collection == ... else collection + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the environment config from the active environment variables + # {} = empty dict, override to ignore the environment config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_environment().items() + }) if environment == ... else environment + + # @abx.hookspec(firstresult=True) + # @abx.hookimpl + # def get_machine_config(self, machine=...) -> dict: + # # ... = ellipsis, means automatically get the machine config from the currently executing machine + # # {} = empty dict, override to ignore the machine config + # if machine == ...: + # machine = Machine.objects.get_current() + # return getattr(machine, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the machine config from the currently executing machine + # {} = empty dict, override to ignore the machine config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_defaults().items() + }) if default == ... else default # TODO: add read_config_file(), write_config_file() hooks From f56cdd2da50c3f7ebdbb0ffe94576095d13cb7b5 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 29 Oct 2024 14:51:31 -0700 Subject: [PATCH 17/25] add chrome flag to fix long screenshots getting cut off --- .../abx_plugin_chrome/config.py | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py index fb1d9095..2a12f492 100644 --- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py @@ -79,15 +79,16 @@ class ChromeConfig(BaseConfigSet): # Chrome Binary CHROME_BINARY: str = Field(default='chrome') CHROME_DEFAULT_ARGS: List[str] = Field(default=[ - '--virtual-time-budget=15000', - '--disable-features=DarkMode', - "--run-all-compositor-stages-before-draw", - "--hide-scrollbars", - "--autoplay-policy=no-user-gesture-required", - "--no-first-run", - "--use-fake-ui-for-media-stream", - "--use-fake-device-for-media-stream", - "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'", + "--no-first-run", # dont show any first run ui / setup prompts + '--virtual-time-budget=15000', # accellerate any animations on the page by 15s into the future + '--disable-features=DarkMode', # disable dark mode for archiving + "--run-all-compositor-stages-before-draw", # dont draw partially rendered content, wait until everything is ready + "--hide-scrollbars", # hide scrollbars to prevent layout shift / scrollbar visible in screenshots + "--autoplay-policy=no-user-gesture-required", # allow media autoplay without user gesture (e.g. on mobile) + "--use-fake-ui-for-media-stream", # provide fake camera if site tries to request camera access + "--use-fake-device-for-media-stream", # provide fake camera if site tries to request camera access + "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'", # ignore chrome updates + "--force-gpu-mem-available-mb=4096", # allows for longer full page screenshots https://github.com/puppeteer/puppeteer/issues/5530 ]) CHROME_EXTRA_ARGS: List[str] = Field(default=[]) @@ -190,6 +191,7 @@ class ChromeConfig(BaseConfigSet): cmd_args.append('--user-data-dir={}'.format(options.CHROME_USER_DATA_DIR)) cmd_args.append('--profile-directory={}'.format(options.CHROME_PROFILE_NAME or 'Default')) + # if CHROME_USER_DATA_DIR is set but folder is empty, create a new profile inside it if not os.path.isfile(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME / 'Preferences'): STDERR.print(f'[green] + creating new Chrome profile in: {pretty_path(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME)}[/green]') cmd_args.remove('--no-first-run') From 9c2eac4e47847aa9dbea08e2700824696d791cd0 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 31 Oct 2024 04:24:06 -0700 Subject: [PATCH 18/25] add new actors and orchestrators --- archivebox/actors/__init__.py | 0 archivebox/actors/actor.py | 144 ++++++++++++++++ archivebox/actors/admin.py | 3 + archivebox/actors/apps.py | 6 + archivebox/actors/migrations/__init__.py | 0 archivebox/actors/models.py | 3 + archivebox/actors/orchestrator.py | 207 +++++++++++++++++++++++ archivebox/actors/tests.py | 3 + archivebox/actors/views.py | 3 + 9 files changed, 369 insertions(+) create mode 100644 archivebox/actors/__init__.py create mode 100644 archivebox/actors/actor.py create mode 100644 archivebox/actors/admin.py create mode 100644 archivebox/actors/apps.py create mode 100644 archivebox/actors/migrations/__init__.py create mode 100644 archivebox/actors/models.py create mode 100644 archivebox/actors/orchestrator.py create mode 100644 archivebox/actors/tests.py create mode 100644 archivebox/actors/views.py diff --git a/archivebox/actors/__init__.py b/archivebox/actors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py new file mode 100644 index 00000000..6e9d523b --- /dev/null +++ b/archivebox/actors/actor.py @@ -0,0 +1,144 @@ +__package__ = 'archivebox.actors' + +import os +import time +import psutil +from typing import ClassVar, Generic, TypeVar, Any, cast, Literal + +from django.db.models import QuerySet +from multiprocessing import Process, cpu_count +from threading import Thread, get_native_id + +# from archivebox.logging_util import TimedProgress + +ALL_SPAWNED_ACTORS: list[psutil.Process] = [] + + +LaunchKwargs = dict[str, Any] + +ObjectType = TypeVar('ObjectType') + +class ActorType(Generic[ObjectType]): + pid: int + + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.7)), 8) # min 2, max 8 + MAX_TICK_TIME: ClassVar[int] = 60 + + def __init__(self, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs): + self.mode = mode + self.launch_kwargs = launch_kwargs + + @classmethod + def get_running_actors(cls) -> list[int]: + # returns a list of pids of all running actors of this type + return [ + proc.pid for proc in ALL_SPAWNED_ACTORS + if proc.is_running() and proc.status() != 'zombie' + ] + + @classmethod + def spawn_actor(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: + actor = cls(mode=mode, **launch_kwargs) + # bg_actor_proccess = Process(target=actor.runloop) + if mode == 'thread': + bg_actor_thread = Thread(target=actor.runloop) + bg_actor_thread.start() + assert bg_actor_thread.native_id is not None + return bg_actor_thread.native_id + else: + bg_actor_process = Process(target=actor.runloop) + bg_actor_process.start() + assert bg_actor_process.pid is not None + ALL_SPAWNED_ACTORS.append(psutil.Process(pid=bg_actor_process.pid)) + return bg_actor_process.pid + + @classmethod + def get_queue(cls) -> QuerySet: + # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) + raise NotImplementedError + + @classmethod + def get_next(cls) -> ObjectType | None: + return cls.get_queue().last() + + @classmethod + def get_actors_to_spawn(cls, queue, running_actors) -> list[LaunchKwargs]: + actors_to_spawn: list[LaunchKwargs] = [] + max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) + queue_length = queue.count() + + if not queue_length: # queue is empty, spawn 0 actors + return actors_to_spawn + elif queue_length > 10: # queue is long, spawn as many as possible + actors_to_spawn += max_spawnable * [{}] + elif queue_length > 5: # queue is medium, spawn 1 or 2 actors + actors_to_spawn += min(2, max_spawnable) * [{}] + else: # queue is short, spawn 1 actor + actors_to_spawn += min(1, max_spawnable) * [{}] + return actors_to_spawn + + def on_startup(self): + if self.mode == 'thread': + self.pid = get_native_id() + else: + self.pid = os.getpid() + print('Actor on_startup()', f'pid={self.pid}') + # abx.pm.hook.on_actor_startup(self) + + def on_shutdown(self): + print('Actor on_shutdown()', f'pid={self.pid}') + # abx.pm.hook.on_actor_shutdown(self) + + def runloop(self): + self.on_startup() + + rechecks = 30 + + while True: + obj_to_process: ObjectType | None = None + try: + obj_to_process = cast(ObjectType, self.get_next()) + except Exception: + pass + + if obj_to_process: + rechecks = 30 + else: + if rechecks == 0: + break # stop looping and exit if queue is empty + else: + # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') + rechecks -= 1 + time.sleep(1) + continue + + if not self.lock(obj_to_process): + continue + + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + try: + # timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + + # run the tick function on the object + self.tick(obj_to_process) + except Exception as err: + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) + print('ERROR: actor tick failed', err) + # refresh the db connection + from django import db + db.connections.close_all() + finally: + # timer.end() + pass + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + + self.on_shutdown() + + def tick(self, obj: ObjectType) -> None: + print('Actor Processing tick()', obj) + + def lock(self, obj: ObjectType) -> bool: + print('Actor lock()', obj) + return True + + diff --git a/archivebox/actors/admin.py b/archivebox/actors/admin.py new file mode 100644 index 00000000..8c38f3f3 --- /dev/null +++ b/archivebox/actors/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/archivebox/actors/apps.py b/archivebox/actors/apps.py new file mode 100644 index 00000000..2347ac3f --- /dev/null +++ b/archivebox/actors/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ActorsConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "actors" diff --git a/archivebox/actors/migrations/__init__.py b/archivebox/actors/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/actors/models.py b/archivebox/actors/models.py new file mode 100644 index 00000000..71a83623 --- /dev/null +++ b/archivebox/actors/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py new file mode 100644 index 00000000..5a404850 --- /dev/null +++ b/archivebox/actors/orchestrator.py @@ -0,0 +1,207 @@ +__package__ = 'archivebox.actors' + +import os +import time +from typing import Dict + +from multiprocessing import Process + +from django.db.models import QuerySet + +from .actor import ActorType + +class Orchestrator: + pid: int + + @classmethod + def spawn_orchestrator(cls) -> int: + orchestrator = cls() + orchestrator_bg_proc = Process(target=orchestrator.runloop) + orchestrator_bg_proc.start() + assert orchestrator_bg_proc.pid is not None + return orchestrator_bg_proc.pid + + @classmethod + def get_all_actor_types(cls) -> Dict[str, ActorType]: + # returns a Dict of all discovered {actor_type_id: ActorType} ... + # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...} + return { + 'TestActor': TestActor(), + } + + @classmethod + def get_orphaned_objects(cls, all_queues) -> list: + # returns a list of objects that are in the queues of all actor types but not in the queues of any other actor types + return [] + + def on_startup(self): + self.pid = os.getpid() + print('Orchestrator startup', self.pid) + # abx.pm.hook.on_orchestrator_startup(self) + + def on_shutdown(self, err: BaseException | None = None): + print('Orchestrator shutdown', self.pid, err) + # abx.pm.hook.on_orchestrator_shutdown(self) + + def on_tick_started(self, actor_types, all_queues): + total_pending = sum(queue.count() for queue in all_queues.values()) + print('Orchestrator tick +', self.pid, f'total_pending={total_pending}') + # abx.pm.hook.on_orchestrator_tick_started(self, actor_types, all_queues) + + def on_tick_finished(self, actor_types, all_queues): + # print('Orchestrator tick √', self.pid) + # abx.pm.hook.on_orchestrator_tick_finished(self, actor_types, all_queues) + pass + + def on_idle(self): + # print('Orchestrator idle', self.pid) + # abx.pm.hook.on_orchestrator_idle(self) + pass + + def runloop(self): + self.pid = os.getpid() + + try: + while True: + actor_types = self.get_all_actor_types() + all_queues = { + actor_type: actor_type.get_queue() + for actor_type in actor_types.values() + } + self.on_tick_started(actor_types, all_queues) + + all_existing_actors = [] + all_spawned_actors = [] + + for actor_type, queue in all_queues.items(): + existing_actors = actor_type.get_running_actors() + all_existing_actors.extend(existing_actors) + actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors) + for launch_kwargs in actors_to_spawn: + all_spawned_actors.append(actor_type.spawn_actor(**launch_kwargs)) + + if all_spawned_actors: + print(f'Found {len(all_existing_actors)} existing actors, Spawned {len(all_spawned_actors)} new actors') + else: + # print(f'No actors to spawn, currently_running: {len(all_existing_actors)}') + time.sleep(1) + + orphaned_objects = self.get_orphaned_objects(all_queues) + if orphaned_objects: + print('WARNING: some objects may will not be processed', orphaned_objects) + + if not any(queue.exists() for queue in all_queues.values()): + # we are idle + self.on_idle() + # time.sleep(0.250) + time.sleep(2) + + self.on_tick_finished(actor_types, all_queues) + + except (KeyboardInterrupt, SystemExit) as err: + self.on_shutdown(err) + + + +from archivebox.config.django import setup_django + +setup_django() + +from core.models import ArchiveResult, Snapshot + +from django.utils import timezone + +from django import db +from django.db import connection + +def get_next_archiveresult_atomically() -> ArchiveResult | None: + with connection.cursor() as cursor: + # select a random archiveresult out of the next 50 pending ones + # (to avoid clashing with another actor thats also selecting from the same list) + cursor.execute(""" + UPDATE core_archiveresult + SET status = 'started' + WHERE status = 'failed' and id = ( + SELECT id FROM ( + SELECT id FROM core_archiveresult + WHERE status = 'failed' + ORDER BY start_ts DESC + LIMIT 50 + ) candidates + ORDER BY RANDOM() + LIMIT 1 + ) + RETURNING *; + """) + result = cursor.fetchone() + + # If no rows were updated, return None + if result is None: + return None + + # Convert the row tuple into a dict matching column names + columns = [col[0] for col in cursor.description] + return ArchiveResult(**dict(zip(columns, result))) + + +class TestActor(ActorType[ArchiveResult]): + @classmethod + def get_queue(cls) -> QuerySet[ArchiveResult]: + return ArchiveResult.objects.filter(status='failed', extractor='favicon') + + @classmethod + def get_next(cls) -> ArchiveResult | None: + return get_next_archiveresult_atomically() + # return cls.get_queue().last() + + def tick(self, obj: ArchiveResult): + # print(f'TestActor[{self.pid}] tick({obj.id})', 'remaining:', self.get_queue().count()) + updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') + if not updated: + raise Exception('Failed to update object status, likely being processed by another actor') + + def lock(self, obj: ArchiveResult) -> bool: + locked = True + # locked = ArchiveResult.objects.select_for_update(skip_locked=True).filter(id=obj.id, status='pending').update(status='started') == 1 + # if locked: + # print(f'TestActor[{self.pid}] lock({obj.id}) πŸ”’') + # else: + # print(f'TestActor[{self.pid}] lock({obj.id}) X') + return locked + +if __name__ == '__main__': + snap = Snapshot.objects.last() + assert snap is not None + + orchestrator = Orchestrator() + orchestrator.spawn_orchestrator() + + for _ in range(50_000): + try: + ar = ArchiveResult.objects.create( + snapshot=snap, + status='failed', + extractor='favicon', + cmd=['echo', '"hello"'], + cmd_version='1.0', + pwd='.', + start_ts=timezone.now(), + end_ts=timezone.now(), + ) + except Exception as err: + print(err) + db.connections.close_all() + if _ % 1000 == 0: + print('Created', _, 'snapshots...') + time.sleep(0.001) + # time.sleep(3) + + # test_queue = TestActor.get_queue() + # thread_actors = [] + # print('Actor queue:', test_queue) + # actors_to_spawn = TestActor.get_actors_to_spawn(test_queue, thread_actors) + # print('Actors to spawn:', actors_to_spawn) + # # thread_actors = [TestActor.spawn_actor(mode='thread') for _ in actors_to_spawn] + # # print('Thread Actors spawned:', thread_actors) + # process_actors = [TestActor.spawn_actor(mode='process') for _ in actors_to_spawn] + # print('Process Actors spawned:', process_actors) diff --git a/archivebox/actors/tests.py b/archivebox/actors/tests.py new file mode 100644 index 00000000..7ce503c2 --- /dev/null +++ b/archivebox/actors/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/archivebox/actors/views.py b/archivebox/actors/views.py new file mode 100644 index 00000000..91ea44a2 --- /dev/null +++ b/archivebox/actors/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. From 17faa5a507ffb5ac1068fa38d9a38fc56de9c5b9 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 31 Oct 2024 07:10:43 -0700 Subject: [PATCH 19/25] improvements to new actor and orchestrators --- archivebox/actors/actor.py | 243 ++++++++++++++++++---------- archivebox/actors/orchestrator.py | 252 ++++++++++++++++-------------- 2 files changed, 298 insertions(+), 197 deletions(-) diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py index 6e9d523b..97316405 100644 --- a/archivebox/actors/actor.py +++ b/archivebox/actors/actor.py @@ -3,8 +3,12 @@ __package__ = 'archivebox.actors' import os import time import psutil -from typing import ClassVar, Generic, TypeVar, Any, cast, Literal +from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type +from rich import print + +from django import db +from django.db import models from django.db.models import QuerySet from multiprocessing import Process, cpu_count from threading import Thread, get_native_id @@ -16,129 +20,210 @@ ALL_SPAWNED_ACTORS: list[psutil.Process] = [] LaunchKwargs = dict[str, Any] -ObjectType = TypeVar('ObjectType') +ModelType = TypeVar('ModelType', bound=models.Model) -class ActorType(Generic[ObjectType]): +class ActorType(Generic[ModelType]): pid: int + idle_count: int = 0 + launch_kwargs: LaunchKwargs = {} - MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.7)), 8) # min 2, max 8 + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8 MAX_TICK_TIME: ClassVar[int] = 60 def __init__(self, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs): self.mode = mode - self.launch_kwargs = launch_kwargs + self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs) + + def __repr__(self) -> str: + label = 'pid' if self.mode == 'process' else 'tid' + return f'[underline]{self.__class__.__name__}[/underline]\\[{label}={self.pid}]' + + def __str__(self) -> str: + return self.__repr__() @classmethod def get_running_actors(cls) -> list[int]: - # returns a list of pids of all running actors of this type + """returns a list of pids of all running actors of this type""" + # WARNING: only works for process actors, not thread actors return [ proc.pid for proc in ALL_SPAWNED_ACTORS if proc.is_running() and proc.status() != 'zombie' ] + + @classmethod + def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: + actor = cls(mode='thread', **launch_kwargs) + bg_actor_thread = Thread(target=actor.runloop) + bg_actor_thread.start() + assert bg_actor_thread.native_id is not None + return bg_actor_thread.native_id @classmethod - def spawn_actor(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: - actor = cls(mode=mode, **launch_kwargs) - # bg_actor_proccess = Process(target=actor.runloop) + def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: + actor = cls(mode='process', **launch_kwargs) + bg_actor_process = Process(target=actor.runloop) + bg_actor_process.start() + assert bg_actor_process.pid is not None + ALL_SPAWNED_ACTORS.append(psutil.Process(pid=bg_actor_process.pid)) + return bg_actor_process.pid + + @classmethod + def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: if mode == 'thread': - bg_actor_thread = Thread(target=actor.runloop) - bg_actor_thread.start() - assert bg_actor_thread.native_id is not None - return bg_actor_thread.native_id - else: - bg_actor_process = Process(target=actor.runloop) - bg_actor_process.start() - assert bg_actor_process.pid is not None - ALL_SPAWNED_ACTORS.append(psutil.Process(pid=bg_actor_process.pid)) - return bg_actor_process.pid + return cls.fork_actor_as_thread(**launch_kwargs) + elif mode == 'process': + return cls.fork_actor_as_process(**launch_kwargs) + raise ValueError(f'Invalid actor mode: {mode}') @classmethod def get_queue(cls) -> QuerySet: + """override this to provide your queryset as the queue""" # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) raise NotImplementedError @classmethod - def get_next(cls) -> ObjectType | None: + def get_next(cls, atomic: bool=True) -> ModelType | None: + if atomic: + return cls.get_next_atomic(model=cls.get_queue().model) return cls.get_queue().last() + @classmethod + def get_next_atomic(cls, model: Type, filter=('status', 'queued'), update=('status', 'started'), sort='created_at', order='DESC', choose_from_top=50) -> ModelType | None: + """ + atomically claim a random object from the top n=50 objects in the queue by updating status=queued->started + optimized for minimizing contention on the queue with other actors selecting from the same list + """ + app_label = model._meta.app_label + model_name = model._meta.model_name + + with db.connection.cursor() as cursor: + # subquery gets the pool of the top 50 candidates sorted by sort and order + # main query selects a random one from that pool + cursor.execute(f""" + UPDATE {app_label}_{model_name} + SET {update[0]} = '{update[1]}' + WHERE {filter[0]} = '{filter[1]}' and id = ( + SELECT id FROM ( + SELECT id FROM {app_label}_{model_name} + WHERE {filter[0]} = '{filter[1]}' + ORDER BY {sort} {order} + LIMIT {choose_from_top} + ) candidates + ORDER BY RANDOM() + LIMIT 1 + ) + RETURNING *; + """) + result = cursor.fetchone() + + # If no rows were claimed, return None + if result is None: + return None + + # reconstruct model instance from the row tuple + columns = [col[0] for col in cursor.description] + return model(**dict(zip(columns, result))) + @classmethod def get_actors_to_spawn(cls, queue, running_actors) -> list[LaunchKwargs]: + """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" actors_to_spawn: list[LaunchKwargs] = [] max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) queue_length = queue.count() - if not queue_length: # queue is empty, spawn 0 actors + # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them, + # let the next orchestrator tick handle starting another 2 on the next tick() + # if queue_length > 10: # queue is long, spawn as many as possible + # actors_to_spawn += max_spawnable * [{}] + + if not queue_length: # queue is empty, spawn 0 actors return actors_to_spawn - elif queue_length > 10: # queue is long, spawn as many as possible - actors_to_spawn += max_spawnable * [{}] - elif queue_length > 5: # queue is medium, spawn 1 or 2 actors - actors_to_spawn += min(2, max_spawnable) * [{}] - else: # queue is short, spawn 1 actor - actors_to_spawn += min(1, max_spawnable) * [{}] + elif queue_length > 4: # queue is medium, spawn 1 or 2 actors + actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}] + else: # queue is short, spawn 1 actor + actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] return actors_to_spawn - + def on_startup(self): if self.mode == 'thread': - self.pid = get_native_id() + self.pid = get_native_id() # thread id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (THREAD)[/green]') else: - self.pid = os.getpid() - print('Actor on_startup()', f'pid={self.pid}') + self.pid = os.getpid() # process id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (PROCESS)[/green]') # abx.pm.hook.on_actor_startup(self) - def on_shutdown(self): - print('Actor on_shutdown()', f'pid={self.pid}') + def on_shutdown(self, err: BaseException | None=None): + print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: ModelType): + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', getattr(obj, 'abid', obj.id)) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: ModelType): + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', getattr(obj, 'abid', obj.id)) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: ModelType, err: BaseException): + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', getattr(obj, 'abid', obj.id), err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) def runloop(self): self.on_startup() - - rechecks = 30 - - while True: - obj_to_process: ObjectType | None = None - try: - obj_to_process = cast(ObjectType, self.get_next()) - except Exception: - pass - - if obj_to_process: - rechecks = 30 - else: - if rechecks == 0: - break # stop looping and exit if queue is empty - else: - # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') - rechecks -= 1 - time.sleep(1) - continue - - if not self.lock(obj_to_process): - continue - - # abx.pm.hook.on_actor_tick_start(self, obj_to_process) - try: - # timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + try: + while True: + obj_to_process: ModelType | None = None + try: + obj_to_process = cast(ModelType, self.get_next()) + except Exception: + pass - # run the tick function on the object - self.tick(obj_to_process) - except Exception as err: - # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) - print('ERROR: actor tick failed', err) - # refresh the db connection - from django import db - db.connections.close_all() - finally: - # timer.end() - pass - # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + if obj_to_process: + self.idle_count = 0 + else: + if self.idle_count >= 30: + break # stop looping and exit if queue is empty and we have rechecked it 30 times + else: + # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') + self.idle_count += 1 + time.sleep(1) + continue + + if not self.lock(obj_to_process): + # we are unable to lock the object, some other actor got it first. skip it and get the next object + continue + + self.on_tick_start(obj_to_process) + + try: + # run the tick function on the object + self.tick(obj_to_process) + except Exception as err: + print(f'[red]πŸƒβ€β™‚οΈ ERROR: {self}.tick()[/red]', err) + db.connections.close_all() + self.on_tick_exception(obj_to_process, err) + finally: + self.on_tick_end(obj_to_process) + + self.on_shutdown(err=None) + except BaseException as err: + if isinstance(err, KeyboardInterrupt): + print() + else: + print(f'\n[red]πŸƒβ€β™‚οΈ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) + self.on_shutdown(err=err) + + def tick(self, obj: ModelType) -> None: + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', getattr(obj, 'abid', obj.id)) - self.on_shutdown() - - def tick(self, obj: ObjectType) -> None: - print('Actor Processing tick()', obj) - - def lock(self, obj: ObjectType) -> bool: - print('Actor lock()', obj) + def lock(self, obj: ModelType) -> bool: + print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', getattr(obj, 'abid', obj.id)) return True diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py index 5a404850..1ca90148 100644 --- a/archivebox/actors/orchestrator.py +++ b/archivebox/actors/orchestrator.py @@ -2,104 +2,132 @@ __package__ = 'archivebox.actors' import os import time -from typing import Dict +import itertools +import uuid +from typing import Dict, Type -from multiprocessing import Process +from multiprocessing import Process, cpu_count + +from rich import print from django.db.models import QuerySet +from django.apps import apps from .actor import ActorType class Orchestrator: pid: int + idle_count: int = 0 + actor_types: Dict[str, Type[ActorType]] - @classmethod - def spawn_orchestrator(cls) -> int: - orchestrator = cls() - orchestrator_bg_proc = Process(target=orchestrator.runloop) + def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None): + self.actor_types = actor_types or self.actor_types or self.autodiscover_actor_types() + + def __repr__(self) -> str: + return f'[underline]{self.__class__.__name__}[/underline]\\[pid={self.pid}]' + + def __str__(self) -> str: + return self.__repr__() + + def start(self) -> int: + orchestrator_bg_proc = Process(target=self.runloop) orchestrator_bg_proc.start() assert orchestrator_bg_proc.pid is not None return orchestrator_bg_proc.pid @classmethod - def get_all_actor_types(cls) -> Dict[str, ActorType]: - # returns a Dict of all discovered {actor_type_id: ActorType} ... + def autodiscover_actor_types(cls) -> Dict[str, Type[ActorType]]: + # returns a Dict of all discovered {actor_type_id: ActorType} across the codebase + # override this method in a subclass to customize the actor types that are used # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...} return { - 'TestActor': TestActor(), + # look through all models and find all classes that inherit from ActorType + # ... } @classmethod def get_orphaned_objects(cls, all_queues) -> list: # returns a list of objects that are in the queues of all actor types but not in the queues of any other actor types - return [] + all_queued_ids = itertools.chain(*[queue.values('id', flat=True) for queue in all_queues.values()]) + orphaned_objects = [] + for model in apps.get_models(): + if hasattr(model, 'retry_at'): + orphaned_objects.extend(model.objects.filter(retry_at__lt=timezone.now()).exclude(id__in=all_queued_ids)) + return orphaned_objects def on_startup(self): self.pid = os.getpid() - print('Orchestrator startup', self.pid) + print(f'[green]πŸ‘¨β€βœˆοΈ {self}.on_startup() STARTUP (PROCESS)[/green]') # abx.pm.hook.on_orchestrator_startup(self) def on_shutdown(self, err: BaseException | None = None): - print('Orchestrator shutdown', self.pid, err) + print(f'[grey53]πŸ‘¨β€βœˆοΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') # abx.pm.hook.on_orchestrator_shutdown(self) - def on_tick_started(self, actor_types, all_queues): - total_pending = sum(queue.count() for queue in all_queues.values()) - print('Orchestrator tick +', self.pid, f'total_pending={total_pending}') + def on_tick_started(self, all_queues): + # total_pending = sum(queue.count() for queue in all_queues.values()) + # print(f'πŸ‘¨β€βœˆοΈ {self}.on_tick_started()', f'total_pending={total_pending}') # abx.pm.hook.on_orchestrator_tick_started(self, actor_types, all_queues) + pass - def on_tick_finished(self, actor_types, all_queues): - # print('Orchestrator tick √', self.pid) + def on_tick_finished(self, all_queues, all_existing_actors, all_spawned_actors): + if all_spawned_actors: + total_queue_length = sum(queue.count() for queue in all_queues.values()) + print(f'[grey53]πŸ‘¨β€βœˆοΈ {self}.on_tick_finished() queue={total_queue_length} existing_actors={len(all_existing_actors)} spawned_actors={len(all_spawned_actors)}[/grey53]') # abx.pm.hook.on_orchestrator_tick_finished(self, actor_types, all_queues) - pass - - def on_idle(self): - # print('Orchestrator idle', self.pid) + + def on_idle(self, all_queues): + # print(f'πŸ‘¨β€βœˆοΈ {self}.on_idle()') # abx.pm.hook.on_orchestrator_idle(self) - pass - + # check for orphaned objects left behind + if self.idle_count == 60: + orphaned_objects = self.get_orphaned_objects(all_queues) + if orphaned_objects: + print('[red]πŸ‘¨β€βœˆοΈ WARNING: some objects may not be processed, no actor has claimed them after 60s:[/red]', orphaned_objects) + def runloop(self): - self.pid = os.getpid() - + self.on_startup() try: while True: - actor_types = self.get_all_actor_types() all_queues = { actor_type: actor_type.get_queue() - for actor_type in actor_types.values() + for actor_type in self.actor_types.values() } - self.on_tick_started(actor_types, all_queues) + if not all_queues: + raise Exception('Failed to find any actor_types to process') + + self.on_tick_started(all_queues) all_existing_actors = [] all_spawned_actors = [] for actor_type, queue in all_queues.items(): - existing_actors = actor_type.get_running_actors() - all_existing_actors.extend(existing_actors) - actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors) - for launch_kwargs in actors_to_spawn: - all_spawned_actors.append(actor_type.spawn_actor(**launch_kwargs)) - - if all_spawned_actors: - print(f'Found {len(all_existing_actors)} existing actors, Spawned {len(all_spawned_actors)} new actors') - else: - # print(f'No actors to spawn, currently_running: {len(all_existing_actors)}') - time.sleep(1) - - orphaned_objects = self.get_orphaned_objects(all_queues) - if orphaned_objects: - print('WARNING: some objects may will not be processed', orphaned_objects) + try: + existing_actors = actor_type.get_running_actors() + all_existing_actors.extend(existing_actors) + actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors) + for launch_kwargs in actors_to_spawn: + new_actor_pid = actor_type.start(mode='process', **launch_kwargs) + all_spawned_actors.append(new_actor_pid) + except BaseException as err: + print(f'πŸƒβ€β™‚οΈ ERROR: {self} Failed to get {actor_type} queue & running actors', err) if not any(queue.exists() for queue in all_queues.values()): - # we are idle - self.on_idle() - # time.sleep(0.250) - time.sleep(2) + self.on_idle(all_queues) + self.idle_count += 1 + time.sleep(1) + else: + self.idle_count = 0 - self.on_tick_finished(actor_types, all_queues) + self.on_tick_finished(all_queues, all_existing_actors, all_spawned_actors) + time.sleep(1) - except (KeyboardInterrupt, SystemExit) as err: - self.on_shutdown(err) + except BaseException as err: + if isinstance(err, KeyboardInterrupt): + print() + else: + print(f'\n[red]πŸƒβ€β™‚οΈ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) + self.on_shutdown(err=err) @@ -114,94 +142,82 @@ from django.utils import timezone from django import db from django.db import connection -def get_next_archiveresult_atomically() -> ArchiveResult | None: - with connection.cursor() as cursor: - # select a random archiveresult out of the next 50 pending ones - # (to avoid clashing with another actor thats also selecting from the same list) - cursor.execute(""" - UPDATE core_archiveresult - SET status = 'started' - WHERE status = 'failed' and id = ( - SELECT id FROM ( - SELECT id FROM core_archiveresult - WHERE status = 'failed' - ORDER BY start_ts DESC - LIMIT 50 - ) candidates - ORDER BY RANDOM() - LIMIT 1 - ) - RETURNING *; - """) - result = cursor.fetchone() - - # If no rows were updated, return None - if result is None: - return None - - # Convert the row tuple into a dict matching column names - columns = [col[0] for col in cursor.description] - return ArchiveResult(**dict(zip(columns, result))) -class TestActor(ActorType[ArchiveResult]): + +class FaviconActor(ActorType[ArchiveResult]): @classmethod def get_queue(cls) -> QuerySet[ArchiveResult]: return ArchiveResult.objects.filter(status='failed', extractor='favicon') @classmethod def get_next(cls) -> ArchiveResult | None: - return get_next_archiveresult_atomically() - # return cls.get_queue().last() + return cls.get_next_atomic( + model=ArchiveResult, + filter=('status', 'failed'), + update=('status', 'started'), + sort='created_at', + order='DESC', + choose_from_top=cpu_count() * 10 + ) def tick(self, obj: ArchiveResult): - # print(f'TestActor[{self.pid}] tick({obj.id})', 'remaining:', self.get_queue().count()) - updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') + print(f'[grey53]{self}.tick({obj.id}) remaining:[/grey53]', self.get_queue().count()) + updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 if not updated: - raise Exception('Failed to update object status, likely being processed by another actor') + raise Exception(f'Failed to update {obj.abid}, interrupted by another actor writing to the same object') def lock(self, obj: ArchiveResult) -> bool: - locked = True + """As an alternative to self.get_next_atomic(), we can use select_for_update() or manually update a semaphore field here""" + # locked = ArchiveResult.objects.select_for_update(skip_locked=True).filter(id=obj.id, status='pending').update(status='started') == 1 # if locked: - # print(f'TestActor[{self.pid}] lock({obj.id}) πŸ”’') + # print(f'FaviconActor[{self.pid}] lock({obj.id}) πŸ”’') # else: - # print(f'TestActor[{self.pid}] lock({obj.id}) X') - return locked - + # print(f'FaviconActor[{self.pid}] lock({obj.id}) X') + return True + + +class ExtractorsOrchestrator(Orchestrator): + actor_types = { + 'FaviconActor': FaviconActor, + } + + if __name__ == '__main__': + orchestrator = ExtractorsOrchestrator() + orchestrator.start() + snap = Snapshot.objects.last() assert snap is not None - - orchestrator = Orchestrator() - orchestrator.spawn_orchestrator() - - for _ in range(50_000): + created = 0 + while True: + time.sleep(0.005) try: - ar = ArchiveResult.objects.create( - snapshot=snap, - status='failed', - extractor='favicon', - cmd=['echo', '"hello"'], - cmd_version='1.0', - pwd='.', - start_ts=timezone.now(), - end_ts=timezone.now(), - ) + ArchiveResult.objects.bulk_create([ + ArchiveResult( + id=uuid.uuid4(), + snapshot=snap, + status='failed', + extractor='favicon', + cmd=['echo', '"hello"'], + cmd_version='1.0', + pwd='.', + start_ts=timezone.now(), + end_ts=timezone.now(), + created_at=timezone.now(), + modified_at=timezone.now(), + created_by_id=1, + ) + for _ in range(100) + ]) + created += 100 + if created % 1000 == 0: + print(f'[blue]Created {created} ArchiveResults...[/blue]') + time.sleep(25) except Exception as err: print(err) db.connections.close_all() - if _ % 1000 == 0: - print('Created', _, 'snapshots...') - time.sleep(0.001) - # time.sleep(3) - - # test_queue = TestActor.get_queue() - # thread_actors = [] - # print('Actor queue:', test_queue) - # actors_to_spawn = TestActor.get_actors_to_spawn(test_queue, thread_actors) - # print('Actors to spawn:', actors_to_spawn) - # # thread_actors = [TestActor.spawn_actor(mode='thread') for _ in actors_to_spawn] - # # print('Thread Actors spawned:', thread_actors) - # process_actors = [TestActor.spawn_actor(mode='process') for _ in actors_to_spawn] - # print('Process Actors spawned:', process_actors) + except BaseException as err: + print(err) + break From 721427a484aa66fd594600d43c5bb78b498759f2 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 31 Oct 2024 07:11:09 -0700 Subject: [PATCH 20/25] hide progress bar on startup --- archivebox/config/django.py | 2 +- archivebox/core/settings.py | 2 +- .../abx_spec_archivebox/states.py | 118 +++++++++++++++++- archivebox/pkgs/abx/abx.py | 2 +- 4 files changed, 117 insertions(+), 7 deletions(-) diff --git a/archivebox/config/django.py b/archivebox/config/django.py index ad3d17c1..073cd2d4 100644 --- a/archivebox/config/django.py +++ b/archivebox/config/django.py @@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None: return with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS: - INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) + INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False) from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 06cfa8b2..cdcf867f 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -191,7 +191,7 @@ SQLITE_CONNECTION_OPTIONS = { # https://gcollazo.com/optimal-sqlite-settings-for-django/ # https://litestream.io/tips/#busy-timeout # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options - "timeout": 5, + "timeout": 10, "check_same_thread": False, "transaction_mode": "IMMEDIATE", "init_command": ( diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py index 15d06f61..a56649da 100644 --- a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py @@ -20,11 +20,119 @@ from django.urls import reverse_lazy from pathlib import Path +# ORCHESTRATOR: +# An orchestrator is a single long-running daemon process that manages spawning and killing actors for different queues of objects. +# The orchestrator first starts when the archivebox starts, and it stops when archivebox is killed. +# Only one orchestrator process can be running per collection per machine. +# An orchestrator is aware of all of the ActorTypes that are defined in the system, and their associated queues. +# When started, the orchestrator runs a single runloop that continues until the archivebox process is killed. +# On each loop, the orchestrator: +# - loops through each ActorType defined in the system: +# - fetches the queue of objects pending for that ActorType by calling ActorType.get_queue() +# - check how many actors are currently running for that ActorType by calling current_actors = ActorType.get_running_actors() +# - determine how many new actors are needed and what their launch kwargs should be to process the objects in each queue +# actors_to_spawn = ActorType.get_actors_to_spawn(queue, current_actors) +# - e.g. if there is are 4 ArchiveResult objects queued all with the same persona + extractor, it should spawn a single actor to process all of them, if there are 4000 it should spawn ~5 actors +# - if there are 4 ArchiveResult objects queued with different personas + extractors, it should spawn a single actor for each persona + extractor +# - if there are *many* objects to process, it can spawn more actors of the same type up to ActorType.MAX_ACTORS to speed things up +# - spawns the new of actors needed as subprocesses ActorType.spawn_actors(actors_to_spawn, block=False, double_fork=False) +# - checks for ANY objects in the DB that have a retry_at time set but where no ActorType has them in their queue, and raises a warning that they are orphaned and will never be processed +# - sleeps for 0.1s before repeating the loop, to reduce the CPU load +# The orchestrator does not manage killing actors, actors are expected to exit on their own when idle. +# ABX defines the following hookspecs for plugins to hook into the orchestrator lifecycle: +# - abx.pm.hook.on_orchestrator_startup(all_actor_types) +# - abx.pm.hook.on_orchestrator_tick_started(all_actor_types, all_queues, all_running_actors) +# - abx.pm.hook.on_orchestrator_idle(all_actor_types) # only run when there are no queues with pending objects to process +# - abx.pm.hook.on_orchestrator_shutdown(all_actor_types) +# OBJECT: +# e.g. Snapshot, Crawl, ArchiveResult +# An object is a single row in a database table, defined by a django model. +# An object has a finite set of states that it can be in. +# An object has a status field that holds the object's current state e.g status="queued". +# An object has a retry_at field that holds a timestamp for when it should next be checked by a actor eventloop. +# Each type of object has a single tick() method defined that handles all of its state transitions. +# When an object's retry_at time has passed, the actor managing that type of object will spwan an actor an call tick(object) to move it to its next state. +# ABX defines the following hookspecs for plugins to hook into object lifecycle: # use these for in-memory operations, dont use these for db on_create/on_update/on_delete logic, separate hooks are available on write operations below +# - abx.pm.hook.on__init(object) # when object is initialized in-memory, don't put any slow code here as it runs on every object returned from DB queries! only for setting default values, ._cache_attrs, etc. +# - abx.pm.hook.on__clean(object) # when object's form fields are validated but before it is to be saved to the DB, put any checks/validations on field values here +# - abx.pm.hook.on__save(object) # when object is being saved to the DB, put any code here that should run right before super().save() +# ACTORS: +# A actor is a long-running daemon process that runs a loop to process a single object at a time from a queue it defines (e.g. ActorType.queue=Snapshot.objects.filter(status='queued', retry_at__lte=time.now())). +# An actor at runtime is an instance of an ActorType class + some launch kwargs that it's passed at startup (e.g. persona, extractor, etc.). +# Actors are started lazily by the orchestrator only when their ActorType.queue indicates there are pending objects to process. +# ActorTypes should define ActorType.get_queue(), ActorType.get_actors_to_spawn(), ActorType.get_running_actors(), and ActorType.spawn_actors() methods exposed to the orchestrator. +# On startup, a actor can initialize shared resources it needs to perform its work, and keep a reference in memory to them. (e.g. launch chrome in the background, setup an API client, etc.) +# On each loop, the actor gets a single object to process from the top of the queue, and runs ActorType.tick(object). +# The actor should have a hardcoded ActorType.MAX_TICK_TIME, and should enforce it by killing the tick() method if it runs too long. +# Before calling tick(), a actor should bump the object.retry_at time by MAX_TICK_TIME to prevent other actors from picking it up while the current actor is still processing it. +# The actor blocks waiting for tick(obj) to finish executing, then the loop repeats and it gets the next object to call tick(object) on. +# If a tick(obj) method raises an exception, the actor should catch it and log it, then move on to the next object in the queue. +# If there are no objects left in the queue, the actor should exit. +# On exit, a actor should release any shared resources it initialized on startup and clean up after itself. +# On startup an actor should fire abx.pm.hook.on_actor_startup(object) and on exit it should fire abx.pm.hook.on_actor_exit(object) (both syncronous hooks that can be used by plugins to register any startup/cleanup code). +# An ActorType defines the following hookspecs for plugins to hook into its behavior: +# - abx.pm.hook.on_actor_startup(actor, queue) +# - abx.pm.hook.on_actor_tick_started(actor, object) +# - abx.pm.hook.on_actor_tick_finished(actor, object) +# - abx.pm.hook.on_actor_tick_exception(actor, object, exception) +# - abx.pm.hook.on_actor_shutdown(actor) +# TICK: +# A tick() method is a method defined on an ActorType, passed a single object to process and perform a single state transition on. +# A tick() method does NOT need to lock the object its operating on, the actor will bump the object's retry_at += MAX_TICK_TIME before handing it off to tick(). +# A tick() method does NOT open a DB transaction for its entire duration of execution, instead it should do all its writes in one atomic operation using a compare-and-swap .select(status=previous_state).update(status=next_state) (optimistic concurrency control). +# A tick() method does NOT return any values, it either succeeds and returns None, or fails and raises an exception to be handled by the actor runloop. +# A tick() method does NOT need to enforce its own MAX_TICK_TIME / any timeouts, the actor runloop code should enforce that. +# A tick() should NOT call other tick() methods directly, and it should not spawn orchestrator or actor processes. +# A tick() should set its object.retry_at time to a value farther in the future and return early if it wants to skip execution due to hitting a ratelimit or transient error. +# A tick() can: +# - read from any other objects, filesystem, or external APIs (e.g. check if snapshot_dir/screenshot.png exists) +# - perform any checks necessary and branch and determine what the transition it should perform to which next state +# - execute a single transition_from_abx_to_xyz(object) method to perform the transition to the next state it decided on +# TRANSITION: +# A transition_from_abx_to_xyz(object) method is a function defined on an ActorType, passed a single object by a tick() method to perform a defined transition on. +# A transition_from_abx_to_xyz() method does NOT need to lock the object its operating on or open any db transactions. +# A transiton should not have any branching logic, it should only execute the given transition that it defines + any side effects. +# A transition should be indempotent, if two transitions run at once on the same object it should only perform one transition and the other should fail +# A transition should be atomic, if it is interrupted it should leave the object in a consistent state +# A transition's main body should: +# - perform a SINGLE write() to the underlying object using a compare_and_swap .filter(status=last_state).update(status=next_state) to move it to its next state +# - update the object's retry_at time to a new value, or set it to None if it's in a final state & should not be checked again +# A transition can also trigger side effects at the end of its execution: +# - update the retry_at time on *other* objects (so that they are rechecked by their own actor on the next tick) (ONLY retry_at, do not update any other fields) +# - filesystem operations (e.g. moving a directory to a new location) +# - external API calls (e.g. uploading to s3, firing a webhook, writing to a logfile, etc.) +# - DO NOT use side effects to directly mutate other objects state or trigger other state transitions +# ABX defines the following hookspecs for plugins to hook into transition behavior: +# - abx.pm.hook.on_transition__from_abx_to_xyz_started(object) +# - abx.pm.hook.on_transition__from_abx_to_xyz_succeeded(object) +# READ: +# A read() method is a function defined for a given ActorType that performs a single read from the DB and/or other read models like django cache, filesystem, in-memory caches, etc. +# A read() method should accept either an instance/pk/uuid/abid or some filter_kwargs, and return a benedict/TypedDict or pydantic model containing bare values as the result. + +# WRITE: +# A write() method is a function defined for a given ActorType that performs a single atomic db write to update the DB, django cache, filesystem, in-memory caches, etc. for that object. +# A write() method does NOT need to lock the object its operating on or open any db transactions, it should just perform a single compare-and-swap .select(status=last_state).update(status=next_state) operation. +# A write() method does NOT need to enforce any timeouts or ratelimits, the tick() method should do that. +# A write() method should NOT have any branching logic or side effects like spawning other processes. +# ABX defines the following hookspecs for plugins to hook into write behavior: +# - abx.pm.hook.on__created(object) +# - abx.pm.hook.on__updated(object) +# - abx.pm.hook.on__deleted(object) + +# SIDEEFFECT: +# A sideeffect is a helper function defined in an app to be used by one or more tick() methods to perform a side effect that isn't a simple DB write or read. +# A sideeffect can spawn other processes, make 3rd-party API calls, write to the filesystem, etc. e.g. subprocess.Popen('wget https://example.com') +# A sideeffect should execute quickly and return early, it should try not to block for slow RPCs, subprocess jobs, or network operations. +# For slow or long-running sideeffects, spawn a separate background process and return immediately. Update the object's retry_at time and state as-needed so that a future tick() will check for any expected output from the background job. +# ABX defines the following hookspecs for plugins to hook into sideeffect behavior: +# - abx.pm.hook.on_sideeffect_xyz_started(object) +# - abx.pm.hook.on_sideeffect_xyz_succeeded(object) +# - abx.pm.hook.on_sideeffect_xyz_failed(object) @@ -99,6 +207,7 @@ def transition_snapshot_to_started(snapshot, config, cwd): fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'end_ts': None} snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update) + # trigger side effects on state transition (these just emit an event to a separate queue thats then processed by a huey worker) cleanup_snapshot_dir(snapshot, config, cwd) create_snapshot_pending_archiveresults(snapshot, config, cwd) update_snapshot_index_json(archiveresult, config, cwd) @@ -114,6 +223,7 @@ def transition_snapshot_to_sealed(snapshot, config, cwd): fields_to_update = {'status': 'sealed', 'retry_at': None, 'end_ts': time.now()} snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update) + # side effects: cleanup_snapshot_dir(snapshot, config, cwd) update_snapshot_index_json(snapshot, config, cwd) update_snapshot_index_html(snapshot, config, cwd) @@ -225,7 +335,7 @@ def transition_archiveresult_to_started(archiveresult, config, cwd): fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'output': None, 'error': None} archiveresult = abx.archivebox.writes.update_archiveresult(filter=archiveresult_to_update, update=fields_to_update) - + # side effects: with TimedProgress(): try: from .extractors import WARC_EXTRACTOR @@ -334,7 +444,7 @@ def on_crawl_created(crawl): @abx.hookimpl def on_snapshot_created(snapshot, config): - create_archiveresults_pending_from_snapshot(snapshot, config) + create_snapshot_pending_archiveresults(snapshot, config) # events @abx.hookimpl @@ -361,7 +471,7 @@ def scheduler_runloop(): try: abx.archivebox.events.on_crawl_schedule_tick(scheduled_crawl) except Exception as e: - abx.archivebox.events.on_crawl_schedule_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl) + abx.archivebox.events.on_crawl_schedule_tick_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl) # abx.archivebox.events.on_scheduler_tick_end(timezone.now(), machine=Machine.objects.get_current_machine(), tasks=scheduled_tasks_due) time.sleep(1) @@ -420,7 +530,7 @@ def create_root_snapshot(crawl): abx.archivebox.writes.update_crawl_stats(started_at=timezone.now()) -def create_archiveresults_pending_from_snapshot(snapshot, config): +def create_snapshot_pending_archiveresults(snapshot, config): config = get_scope_config( # defaults=settings.CONFIG_FROM_DEFAULTS, # configfile=settings.CONFIG_FROM_FILE, diff --git a/archivebox/pkgs/abx/abx.py b/archivebox/pkgs/abx/abx.py index 4b08e743..de4f0046 100644 --- a/archivebox/pkgs/abx/abx.py +++ b/archivebox/pkgs/abx/abx.py @@ -262,7 +262,7 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo: # import the plugin module by its name if isinstance(plugin, str): module = importlib.import_module(plugin) - print('IMPORTED PLUGIN:', plugin) + # print('IMPORTED PLUGIN:', plugin) plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) elif inspect.ismodule(plugin): module = plugin From dbe5c0bc07486c4f216d9b4b247d921070f8e2e2 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 2 Nov 2024 17:25:51 -0700 Subject: [PATCH 21/25] more orchestrator and actor improvements --- archivebox/actors/actor.py | 63 +++++++++---- archivebox/actors/orchestrator.py | 144 +++++++++++++++++++----------- 2 files changed, 137 insertions(+), 70 deletions(-) diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py index 97316405..98fdd4cb 100644 --- a/archivebox/actors/actor.py +++ b/archivebox/actors/actor.py @@ -2,10 +2,11 @@ __package__ = 'archivebox.actors' import os import time -import psutil from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type +from django.utils.functional import classproperty from rich import print +import psutil from django import db from django.db import models @@ -37,11 +38,15 @@ class ActorType(Generic[ModelType]): def __repr__(self) -> str: label = 'pid' if self.mode == 'process' else 'tid' - return f'[underline]{self.__class__.__name__}[/underline]\\[{label}={self.pid}]' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' def __str__(self) -> str: return self.__repr__() + @classproperty + def name(cls) -> str: + return cls.__name__ # type: ignore + @classmethod def get_running_actors(cls) -> list[int]: """returns a list of pids of all running actors of this type""" @@ -89,7 +94,35 @@ class ActorType(Generic[ModelType]): return cls.get_queue().last() @classmethod - def get_next_atomic(cls, model: Type, filter=('status', 'queued'), update=('status', 'started'), sort='created_at', order='DESC', choose_from_top=50) -> ModelType | None: + def get_random(cls, model: Type[ModelType], where='status = "queued"', set='status = "started"', choose_from_top=50) -> ModelType | None: + app_label = model._meta.app_label + model_name = model._meta.model_name + + with db.connection.cursor() as cursor: + # subquery gets the pool of the top 50 candidates sorted by sort and order + # main query selects a random one from that pool + cursor.execute(f""" + UPDATE {app_label}_{model_name} + SET {set} + WHERE {where} and id = ( + SELECT id FROM {app_label}_{model_name} + WHERE {where} + LIMIT 1 + OFFSET ABS(RANDOM()) % {choose_from_top} + ) + RETURNING id; + """) + result = cursor.fetchone() + + # If no rows were claimed, return None + if result is None: + return None + + return model.objects.get(id=result[0]) + + + @classmethod + def get_next_atomic(cls, model: Type[ModelType], where='status = "queued"', set='status = "started"', order_by='created_at DESC', choose_from_top=50) -> ModelType | None: """ atomically claim a random object from the top n=50 objects in the queue by updating status=queued->started optimized for minimizing contention on the queue with other actors selecting from the same list @@ -102,18 +135,18 @@ class ActorType(Generic[ModelType]): # main query selects a random one from that pool cursor.execute(f""" UPDATE {app_label}_{model_name} - SET {update[0]} = '{update[1]}' - WHERE {filter[0]} = '{filter[1]}' and id = ( + SET {set} + WHERE {where} and id = ( SELECT id FROM ( SELECT id FROM {app_label}_{model_name} - WHERE {filter[0]} = '{filter[1]}' - ORDER BY {sort} {order} + WHERE {where} + ORDER BY {order_by} LIMIT {choose_from_top} ) candidates ORDER BY RANDOM() LIMIT 1 ) - RETURNING *; + RETURNING id; """) result = cursor.fetchone() @@ -121,9 +154,7 @@ class ActorType(Generic[ModelType]): if result is None: return None - # reconstruct model instance from the row tuple - columns = [col[0] for col in cursor.description] - return model(**dict(zip(columns, result))) + return model.objects.get(id=result[0]) @classmethod def get_actors_to_spawn(cls, queue, running_actors) -> list[LaunchKwargs]: @@ -159,19 +190,19 @@ class ActorType(Generic[ModelType]): # abx.pm.hook.on_actor_shutdown(self) def on_tick_start(self, obj: ModelType): - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', getattr(obj, 'abid', obj.id)) + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) # abx.pm.hook.on_actor_tick_start(self, obj_to_process) # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') pass def on_tick_end(self, obj: ModelType): - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', getattr(obj, 'abid', obj.id)) + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) # abx.pm.hook.on_actor_tick_end(self, obj_to_process) # self.timer.end() pass def on_tick_exception(self, obj: ModelType, err: BaseException): - print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', getattr(obj, 'abid', obj.id), err) + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) def runloop(self): @@ -220,10 +251,10 @@ class ActorType(Generic[ModelType]): self.on_shutdown(err=err) def tick(self, obj: ModelType) -> None: - print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', getattr(obj, 'abid', obj.id)) + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) def lock(self, obj: ModelType) -> bool: - print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', getattr(obj, 'abid', obj.id)) + print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', obj.abid or obj.id) return True diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py index 1ca90148..ff33ec3e 100644 --- a/archivebox/actors/orchestrator.py +++ b/archivebox/actors/orchestrator.py @@ -4,9 +4,12 @@ import os import time import itertools import uuid -from typing import Dict, Type +from typing import Dict, Type, Literal +from django.utils.functional import classproperty from multiprocessing import Process, cpu_count +from threading import Thread, get_native_id + from rich import print @@ -19,21 +22,41 @@ class Orchestrator: pid: int idle_count: int = 0 actor_types: Dict[str, Type[ActorType]] + mode: Literal['thread', 'process'] = 'process' - def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None): + def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None, mode: Literal['thread', 'process'] | None=None): self.actor_types = actor_types or self.actor_types or self.autodiscover_actor_types() + self.mode = mode or self.mode def __repr__(self) -> str: - return f'[underline]{self.__class__.__name__}[/underline]\\[pid={self.pid}]' + label = 'tid' if self.mode == 'thread' else 'pid' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' def __str__(self) -> str: return self.__repr__() + + @classproperty + def name(cls) -> str: + return cls.__name__ # type: ignore + + def fork_as_thread(self): + self.thread = Thread(target=self.runloop) + self.thread.start() + assert self.thread.native_id is not None + return self.thread.native_id + + def fork_as_process(self): + self.process = Process(target=self.runloop) + self.process.start() + assert self.process.pid is not None + return self.process.pid def start(self) -> int: - orchestrator_bg_proc = Process(target=self.runloop) - orchestrator_bg_proc.start() - assert orchestrator_bg_proc.pid is not None - return orchestrator_bg_proc.pid + if self.mode == 'thread': + return self.fork_as_thread() + elif self.mode == 'process': + return self.fork_as_process() + raise ValueError(f'Invalid orchestrator mode: {self.mode}') @classmethod def autodiscover_actor_types(cls) -> Dict[str, Type[ActorType]]: @@ -42,7 +65,8 @@ class Orchestrator: # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...} return { # look through all models and find all classes that inherit from ActorType - # ... + # actor_type.__name__: actor_type + # for actor_type in abx.pm.hook.get_all_ACTORS_TYPES().values() } @classmethod @@ -56,8 +80,12 @@ class Orchestrator: return orphaned_objects def on_startup(self): - self.pid = os.getpid() - print(f'[green]πŸ‘¨β€βœˆοΈ {self}.on_startup() STARTUP (PROCESS)[/green]') + if self.mode == 'thread': + self.pid = get_native_id() + print(f'[green]πŸ‘¨β€βœˆοΈ {self}.on_startup() STARTUP (THREAD)[/green]') + elif self.mode == 'process': + self.pid = os.getpid() + print(f'[green]πŸ‘¨β€βœˆοΈ {self}.on_startup() STARTUP (PROCESS)[/green]') # abx.pm.hook.on_orchestrator_startup(self) def on_shutdown(self, err: BaseException | None = None): @@ -109,8 +137,10 @@ class Orchestrator: for launch_kwargs in actors_to_spawn: new_actor_pid = actor_type.start(mode='process', **launch_kwargs) all_spawned_actors.append(new_actor_pid) - except BaseException as err: + except Exception as err: print(f'πŸƒβ€β™‚οΈ ERROR: {self} Failed to get {actor_type} queue & running actors', err) + except BaseException: + raise if not any(queue.exists() for queue in all_queues.values()): self.on_idle(all_queues) @@ -152,30 +182,36 @@ class FaviconActor(ActorType[ArchiveResult]): @classmethod def get_next(cls) -> ArchiveResult | None: - return cls.get_next_atomic( + # return cls.get_next_atomic( + # model=ArchiveResult, + # where='status = "failed"', + # set='status = "started"', + # order_by='created_at DESC', + # choose_from_top=cpu_count() * 10, + # ) + return cls.get_random( model=ArchiveResult, - filter=('status', 'failed'), - update=('status', 'started'), - sort='created_at', - order='DESC', - choose_from_top=cpu_count() * 10 + where='status = "failed"', + set='status = "queued"', + choose_from_top=cls.get_queue().count(), ) def tick(self, obj: ArchiveResult): - print(f'[grey53]{self}.tick({obj.id}) remaining:[/grey53]', self.get_queue().count()) + print(f'[grey53]{self}.tick({obj.abid or obj.id}) remaining:[/grey53]', self.get_queue().count()) updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 if not updated: - raise Exception(f'Failed to update {obj.abid}, interrupted by another actor writing to the same object') + raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') def lock(self, obj: ArchiveResult) -> bool: """As an alternative to self.get_next_atomic(), we can use select_for_update() or manually update a semaphore field here""" - # locked = ArchiveResult.objects.select_for_update(skip_locked=True).filter(id=obj.id, status='pending').update(status='started') == 1 - # if locked: - # print(f'FaviconActor[{self.pid}] lock({obj.id}) πŸ”’') - # else: - # print(f'FaviconActor[{self.pid}] lock({obj.id}) X') - return True + locked = ArchiveResult.objects.filter(id=obj.id, status='queued').update(status='started') == 1 + if locked: + # print(f'FaviconActor[{self.pid}] lock({obj.id}) πŸ”’') + pass + else: + print(f'FaviconActor[{self.pid}] lock({obj.id}) X') + return locked class ExtractorsOrchestrator(Orchestrator): @@ -192,32 +228,32 @@ if __name__ == '__main__': assert snap is not None created = 0 while True: - time.sleep(0.005) - try: - ArchiveResult.objects.bulk_create([ - ArchiveResult( - id=uuid.uuid4(), - snapshot=snap, - status='failed', - extractor='favicon', - cmd=['echo', '"hello"'], - cmd_version='1.0', - pwd='.', - start_ts=timezone.now(), - end_ts=timezone.now(), - created_at=timezone.now(), - modified_at=timezone.now(), - created_by_id=1, - ) - for _ in range(100) - ]) - created += 100 - if created % 1000 == 0: - print(f'[blue]Created {created} ArchiveResults...[/blue]') - time.sleep(25) - except Exception as err: - print(err) - db.connections.close_all() - except BaseException as err: - print(err) - break + time.sleep(0.05) + # try: + # ArchiveResult.objects.bulk_create([ + # ArchiveResult( + # id=uuid.uuid4(), + # snapshot=snap, + # status='failed', + # extractor='favicon', + # cmd=['echo', '"hello"'], + # cmd_version='1.0', + # pwd='.', + # start_ts=timezone.now(), + # end_ts=timezone.now(), + # created_at=timezone.now(), + # modified_at=timezone.now(), + # created_by_id=1, + # ) + # for _ in range(100) + # ]) + # created += 100 + # if created % 1000 == 0: + # print(f'[blue]Created {created} ArchiveResults...[/blue]') + # time.sleep(25) + # except Exception as err: + # print(err) + # db.connections.close_all() + # except BaseException as err: + # print(err) + # break From 2337f874ad9988846a8f8cf9ce869135711b2c87 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 2 Nov 2024 19:54:25 -0700 Subject: [PATCH 22/25] better actor atomic claim --- archivebox/actors/actor.py | 364 +++++++++++++++++------------- archivebox/actors/orchestrator.py | 10 +- 2 files changed, 215 insertions(+), 159 deletions(-) diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py index 98fdd4cb..1d59bb8f 100644 --- a/archivebox/actors/actor.py +++ b/archivebox/actors/actor.py @@ -2,6 +2,7 @@ __package__ = 'archivebox.actors' import os import time +from abc import ABC, abstractmethod from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type from django.utils.functional import classproperty @@ -16,148 +17,77 @@ from threading import Thread, get_native_id # from archivebox.logging_util import TimedProgress -ALL_SPAWNED_ACTORS: list[psutil.Process] = [] - - LaunchKwargs = dict[str, Any] ModelType = TypeVar('ModelType', bound=models.Model) -class ActorType(Generic[ModelType]): +class ActorType(ABC, Generic[ModelType]): + """ + Base class for all actors. Usage: + class FaviconActor(ActorType[ArchiveResult]): + QUERYSET: ClassVar[QuerySet] = ArchiveResult.objects.filter(status='queued', extractor='favicon') + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + ATOMIC: ClassVar[bool] = True + + def claim_sql_set(self, obj: ArchiveResult) -> str: + # SQL fields to update atomically while claiming an object from the queue + retry_at = datetime.now() + timedelta(seconds=self.MAX_TICK_TIME) + return f"status = 'started', locked_by = {self.pid}, retry_at = {retry_at}" + + def tick(self, obj: ArchiveResult) -> None: + run_favicon_extractor(obj) + ArchiveResult.objects.filter(pk=obj.pk, status='started').update(status='success') + """ pid: int idle_count: int = 0 launch_kwargs: LaunchKwargs = {} + mode: Literal['thread', 'process'] = 'process' + + QUERYSET: ClassVar[QuerySet] # the QuerySet to claim objects from + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + ATOMIC: ClassVar[bool] = True # whether to atomically fetch+claim the nextobject in one step, or fetch and lock it in two steps # model_type: Type[ModelType] - MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8 - MAX_TICK_TIME: ClassVar[int] = 60 + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object - def __init__(self, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs): - self.mode = mode + _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class + + def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs): + self.mode = mode or self.mode self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs) - def __repr__(self) -> str: - label = 'pid' if self.mode == 'process' else 'tid' - return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' - - def __str__(self) -> str: - return self.__repr__() - @classproperty def name(cls) -> str: return cls.__name__ # type: ignore + def __str__(self) -> str: + return self.__repr__() + + def __repr__(self) -> str: + """FaviconActor[pid=1234]""" + label = 'pid' if self.mode == 'process' else 'tid' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' + + ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned + @classmethod def get_running_actors(cls) -> list[int]: """returns a list of pids of all running actors of this type""" # WARNING: only works for process actors, not thread actors + if cls.mode == 'thread': + raise NotImplementedError('get_running_actors() is not implemented for thread actors') return [ - proc.pid for proc in ALL_SPAWNED_ACTORS + proc.pid for proc in cls._SPAWNED_ACTOR_PIDS if proc.is_running() and proc.status() != 'zombie' ] @classmethod - def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: - actor = cls(mode='thread', **launch_kwargs) - bg_actor_thread = Thread(target=actor.runloop) - bg_actor_thread.start() - assert bg_actor_thread.native_id is not None - return bg_actor_thread.native_id - - @classmethod - def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: - actor = cls(mode='process', **launch_kwargs) - bg_actor_process = Process(target=actor.runloop) - bg_actor_process.start() - assert bg_actor_process.pid is not None - ALL_SPAWNED_ACTORS.append(psutil.Process(pid=bg_actor_process.pid)) - return bg_actor_process.pid - - @classmethod - def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: - if mode == 'thread': - return cls.fork_actor_as_thread(**launch_kwargs) - elif mode == 'process': - return cls.fork_actor_as_process(**launch_kwargs) - raise ValueError(f'Invalid actor mode: {mode}') - - @classmethod - def get_queue(cls) -> QuerySet: - """override this to provide your queryset as the queue""" - # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) - raise NotImplementedError - - @classmethod - def get_next(cls, atomic: bool=True) -> ModelType | None: - if atomic: - return cls.get_next_atomic(model=cls.get_queue().model) - return cls.get_queue().last() - - @classmethod - def get_random(cls, model: Type[ModelType], where='status = "queued"', set='status = "started"', choose_from_top=50) -> ModelType | None: - app_label = model._meta.app_label - model_name = model._meta.model_name - - with db.connection.cursor() as cursor: - # subquery gets the pool of the top 50 candidates sorted by sort and order - # main query selects a random one from that pool - cursor.execute(f""" - UPDATE {app_label}_{model_name} - SET {set} - WHERE {where} and id = ( - SELECT id FROM {app_label}_{model_name} - WHERE {where} - LIMIT 1 - OFFSET ABS(RANDOM()) % {choose_from_top} - ) - RETURNING id; - """) - result = cursor.fetchone() - - # If no rows were claimed, return None - if result is None: - return None - - return model.objects.get(id=result[0]) - - - @classmethod - def get_next_atomic(cls, model: Type[ModelType], where='status = "queued"', set='status = "started"', order_by='created_at DESC', choose_from_top=50) -> ModelType | None: - """ - atomically claim a random object from the top n=50 objects in the queue by updating status=queued->started - optimized for minimizing contention on the queue with other actors selecting from the same list - """ - app_label = model._meta.app_label - model_name = model._meta.model_name - - with db.connection.cursor() as cursor: - # subquery gets the pool of the top 50 candidates sorted by sort and order - # main query selects a random one from that pool - cursor.execute(f""" - UPDATE {app_label}_{model_name} - SET {set} - WHERE {where} and id = ( - SELECT id FROM ( - SELECT id FROM {app_label}_{model_name} - WHERE {where} - ORDER BY {order_by} - LIMIT {choose_from_top} - ) candidates - ORDER BY RANDOM() - LIMIT 1 - ) - RETURNING id; - """) - result = cursor.fetchone() - - # If no rows were claimed, return None - if result is None: - return None - - return model.objects.get(id=result[0]) - - @classmethod - def get_actors_to_spawn(cls, queue, running_actors) -> list[LaunchKwargs]: + def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]: """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" actors_to_spawn: list[LaunchKwargs] = [] max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) @@ -175,69 +105,78 @@ class ActorType(Generic[ModelType]): else: # queue is short, spawn 1 actor actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] return actors_to_spawn - - def on_startup(self): - if self.mode == 'thread': - self.pid = get_native_id() # thread id - print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (THREAD)[/green]') - else: - self.pid = os.getpid() # process id - print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (PROCESS)[/green]') - # abx.pm.hook.on_actor_startup(self) - def on_shutdown(self, err: BaseException | None=None): - print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') - # abx.pm.hook.on_actor_shutdown(self) + @classmethod + def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: + if mode == 'thread': + return cls.fork_actor_as_thread(**launch_kwargs) + elif mode == 'process': + return cls.fork_actor_as_process(**launch_kwargs) + raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"') - def on_tick_start(self, obj: ModelType): - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) - # abx.pm.hook.on_actor_tick_start(self, obj_to_process) - # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') - pass + @classmethod + def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background thread running the actor's runloop""" + actor = cls(mode='thread', **launch_kwargs) + bg_actor_thread = Thread(target=actor.runloop) + bg_actor_thread.start() + assert bg_actor_thread.native_id is not None + return bg_actor_thread.native_id - def on_tick_end(self, obj: ModelType): - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) - # abx.pm.hook.on_actor_tick_end(self, obj_to_process) - # self.timer.end() - pass + @classmethod + def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background process running the actor's runloop""" + actor = cls(mode='process', **launch_kwargs) + bg_actor_process = Process(target=actor.runloop) + bg_actor_process.start() + assert bg_actor_process.pid is not None + cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid)) + return bg_actor_process.pid - def on_tick_exception(self, obj: ModelType, err: BaseException): - print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) - # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) + @classmethod + def get_model(cls) -> Type[ModelType]: + # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty + return cls.QUERYSET.model + + @classmethod + def get_queue(cls) -> QuerySet: + """override this to provide your queryset as the queue""" + # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) + return cls.QUERYSET + + + ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process) def runloop(self): + """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty""" self.on_startup() try: while True: obj_to_process: ModelType | None = None try: - obj_to_process = cast(ModelType, self.get_next()) + obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic)) except Exception: pass if obj_to_process: - self.idle_count = 0 + self.idle_count = 0 # reset idle count if we got an object else: if self.idle_count >= 30: - break # stop looping and exit if queue is empty and we have rechecked it 30 times + break # stop looping and exit if queue is empty and we have idled for 30sec else: # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') self.idle_count += 1 time.sleep(1) continue - if not self.lock(obj_to_process): - # we are unable to lock the object, some other actor got it first. skip it and get the next object - continue - self.on_tick_start(obj_to_process) + # Process the object try: - # run the tick function on the object self.tick(obj_to_process) except Exception as err: print(f'[red]πŸƒβ€β™‚οΈ ERROR: {self}.tick()[/red]', err) - db.connections.close_all() + db.connections.close_all() # always reset the db connection after an exception to clear any pending transactions self.on_tick_exception(obj_to_process, err) finally: self.on_tick_end(obj_to_process) @@ -249,12 +188,125 @@ class ActorType(Generic[ModelType]): else: print(f'\n[red]πŸƒβ€β™‚οΈ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) self.on_shutdown(err=err) + + def get_next(self, atomic: bool | None=None) -> ModelType | None: + """get the next object from the queue, atomically locking it if self.atomic=True""" + if atomic is None: + atomic = self.ATOMIC - def tick(self, obj: ModelType) -> None: - print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) - - def lock(self, obj: ModelType) -> bool: - print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', obj.abid or obj.id) + if atomic: + # fetch and claim the next object from in the queue in one go atomically + obj = self.get_next_atomic() + else: + # two-step claim: fetch the next object and lock it in a separate query + obj = self.get_queue().last() + assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE' + return obj + + def lock_next(self, obj: ModelType) -> bool: + """override this to implement a custom two-step (non-atomic)lock mechanism""" + # For example: + # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid) + # Not needed if using get_next_and_lock() to claim the object atomically + # print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', obj.abid or obj.id) return True + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + return self.CLAIM_SET + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def get_next_atomic(self, shallow: bool=True) -> ModelType | None: + """ + claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object) + optimized for minimizing contention on the queue with other actors selecting from the same list + slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count + """ + Model = self.get_model() # e.g. ArchiveResult + table = f'{Model._meta.app_label}_{Model._meta.model_name}' # e.g. core_archiveresult + + where_sql = self.claim_sql_where() + set_sql = self.claim_sql_set() + order_by_sql = self.claim_sql_order() + choose_from_top = self.claim_from_top() + + with db.connection.cursor() as cursor: + # subquery gets the pool of the top 50 candidates sorted by sort and order + # main query selects a random one from that pool + cursor.execute(f""" + UPDATE {table} + SET {set_sql} + WHERE {where_sql} and id = ( + SELECT id FROM ( + SELECT id FROM {table} + WHERE {where_sql} + ORDER BY {order_by_sql} + LIMIT {choose_from_top} + ) candidates + ORDER BY RANDOM() + LIMIT 1 + ) + RETURNING id; + """) + result = cursor.fetchone() + + if result is None: + return None # If no rows were claimed, return None + if shallow: + # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields: + columns = [col[0] for col in cursor.description or ['id']] + return Model(**dict(zip(columns, result))) + # if not shallow do one extra query to get a more complete object instance (load it fully from scratch) + return Model.objects.get(id=result[0]) + + @abstractmethod + def tick(self, obj: ModelType) -> None: + """override this to process the object""" + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_startup(self) -> None: + if self.mode == 'thread': + self.pid = get_native_id() # thread id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (THREAD)[/green]') + else: + self.pid = os.getpid() # process id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (PROCESS)[/green]') + # abx.pm.hook.on_actor_startup(self) + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: ModelType) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: ModelType) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: ModelType, err: BaseException) -> None: + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py index ff33ec3e..c7fed888 100644 --- a/archivebox/actors/orchestrator.py +++ b/archivebox/actors/orchestrator.py @@ -191,22 +191,26 @@ class FaviconActor(ActorType[ArchiveResult]): # ) return cls.get_random( model=ArchiveResult, - where='status = "failed"', + where='status = "failed" AND extractor = "favicon"', set='status = "queued"', - choose_from_top=cls.get_queue().count(), + choose_from_top=50, ) def tick(self, obj: ArchiveResult): - print(f'[grey53]{self}.tick({obj.abid or obj.id}) remaining:[/grey53]', self.get_queue().count()) + print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 if not updated: raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') + # obj.refresh_from_db() + obj.status = 'success' def lock(self, obj: ArchiveResult) -> bool: """As an alternative to self.get_next_atomic(), we can use select_for_update() or manually update a semaphore field here""" locked = ArchiveResult.objects.filter(id=obj.id, status='queued').update(status='started') == 1 if locked: + # obj.refresh_from_db() + obj.status = 'started' # print(f'FaviconActor[{self.pid}] lock({obj.id}) πŸ”’') pass else: From 41efd010f0f6567f064f8a748b775dd0caf89f99 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 2 Nov 2024 19:54:37 -0700 Subject: [PATCH 23/25] add wip crawl actor spec --- archivebox/actors/actor_crawl.py | 286 +++++++++++++++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 archivebox/actors/actor_crawl.py diff --git a/archivebox/actors/actor_crawl.py b/archivebox/actors/actor_crawl.py new file mode 100644 index 00000000..11d85042 --- /dev/null +++ b/archivebox/actors/actor_crawl.py @@ -0,0 +1,286 @@ +__package__ = 'archivebox.actors' + +import os +import time +from typing import ClassVar, Generic, cast, Literal, Type +from django.utils.functional import classproperty + +from rich import print +import psutil + +from django import db +from django.db.models import QuerySet +from multiprocessing import Process, cpu_count +from threading import Thread, get_native_id + +from crawls.models import Crawl + +from .actor import ActorType, LaunchKwargs + +class CrawlActor(ActorType[Crawl]): + + QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued') + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class + + def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs): + self.mode = mode or self.mode + self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs) + + @classproperty + def name(cls) -> str: + return cls.__name__ # type: ignore + + def __str__(self) -> str: + return self.__repr__() + + def __repr__(self) -> str: + """FaviconActor[pid=1234]""" + label = 'pid' if self.mode == 'process' else 'tid' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' + + ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned + + @classmethod + def get_running_actors(cls) -> list[int]: + """returns a list of pids of all running actors of this type""" + # WARNING: only works for process actors, not thread actors + if cls.mode == 'thread': + raise NotImplementedError('get_running_actors() is not implemented for thread actors') + return [ + proc.pid for proc in cls._SPAWNED_ACTOR_PIDS + if proc.is_running() and proc.status() != 'zombie' + ] + + @classmethod + def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]: + """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" + actors_to_spawn: list[LaunchKwargs] = [] + max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) + queue_length = queue.count() + + # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them, + # let the next orchestrator tick handle starting another 2 on the next tick() + # if queue_length > 10: # queue is long, spawn as many as possible + # actors_to_spawn += max_spawnable * [{}] + + if not queue_length: # queue is empty, spawn 0 actors + return actors_to_spawn + elif queue_length > 4: # queue is medium, spawn 1 or 2 actors + actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}] + else: # queue is short, spawn 1 actor + actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] + return actors_to_spawn + + @classmethod + def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: + if mode == 'thread': + return cls.fork_actor_as_thread(**launch_kwargs) + elif mode == 'process': + return cls.fork_actor_as_process(**launch_kwargs) + raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"') + + @classmethod + def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background thread running the actor's runloop""" + actor = cls(mode='thread', **launch_kwargs) + bg_actor_thread = Thread(target=actor.runloop) + bg_actor_thread.start() + assert bg_actor_thread.native_id is not None + return bg_actor_thread.native_id + + @classmethod + def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background process running the actor's runloop""" + actor = cls(mode='process', **launch_kwargs) + bg_actor_process = Process(target=actor.runloop) + bg_actor_process.start() + assert bg_actor_process.pid is not None + cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid)) + return bg_actor_process.pid + + @classmethod + def get_model(cls) -> Type[ModelType]: + # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty + return cls.QUERYSET.model + + @classmethod + def get_queue(cls) -> QuerySet: + """override this to provide your queryset as the queue""" + # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) + return cls.QUERYSET + + + ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process) + + def runloop(self): + """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty""" + self.on_startup() + try: + while True: + obj_to_process: ModelType | None = None + try: + obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic)) + except Exception: + pass + + if obj_to_process: + self.idle_count = 0 # reset idle count if we got an object + else: + if self.idle_count >= 30: + break # stop looping and exit if queue is empty and we have idled for 30sec + else: + # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') + self.idle_count += 1 + time.sleep(1) + continue + + self.on_tick_start(obj_to_process) + + # Process the object + try: + self.tick(obj_to_process) + except Exception as err: + print(f'[red]πŸƒβ€β™‚οΈ ERROR: {self}.tick()[/red]', err) + db.connections.close_all() # always reset the db connection after an exception to clear any pending transactions + self.on_tick_exception(obj_to_process, err) + finally: + self.on_tick_end(obj_to_process) + + self.on_shutdown(err=None) + except BaseException as err: + if isinstance(err, KeyboardInterrupt): + print() + else: + print(f'\n[red]πŸƒβ€β™‚οΈ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) + self.on_shutdown(err=err) + + def get_next(self, atomic: bool | None=None) -> ModelType | None: + """get the next object from the queue, atomically locking it if self.atomic=True""" + if atomic is None: + atomic = self.ATOMIC + + if atomic: + # fetch and claim the next object from in the queue in one go atomically + obj = self.get_next_atomic() + else: + # two-step claim: fetch the next object and lock it in a separate query + obj = self.get_queue().last() + assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE' + return obj + + def lock_next(self, obj: ModelType) -> bool: + """override this to implement a custom two-step (non-atomic)lock mechanism""" + # For example: + # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid) + # Not needed if using get_next_and_lock() to claim the object atomically + # print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', obj.abid or obj.id) + return True + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + return self.CLAIM_SET + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def get_next_atomic(self, shallow: bool=True) -> ModelType | None: + """ + claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object) + optimized for minimizing contention on the queue with other actors selecting from the same list + slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count + """ + Model = self.get_model() # e.g. ArchiveResult + table = f'{Model._meta.app_label}_{Model._meta.model_name}' # e.g. core_archiveresult + + where_sql = self.claim_sql_where() + set_sql = self.claim_sql_set() + order_by_sql = self.claim_sql_order() + choose_from_top = self.claim_from_top() + + with db.connection.cursor() as cursor: + # subquery gets the pool of the top 50 candidates sorted by sort and order + # main query selects a random one from that pool + cursor.execute(f""" + UPDATE {table} + SET {set_sql} + WHERE {where_sql} and id = ( + SELECT id FROM ( + SELECT id FROM {table} + WHERE {where_sql} + ORDER BY {order_by_sql} + LIMIT {choose_from_top} + ) candidates + ORDER BY RANDOM() + LIMIT 1 + ) + RETURNING id; + """) + result = cursor.fetchone() + + if result is None: + return None # If no rows were claimed, return None + + if shallow: + # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields: + columns = [col[0] for col in cursor.description or ['id']] + return Model(**dict(zip(columns, result))) + + # if not shallow do one extra query to get a more complete object instance (load it fully from scratch) + return Model.objects.get(id=result[0]) + + @abstractmethod + def tick(self, obj: ModelType) -> None: + """override this to process the object""" + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_startup(self) -> None: + if self.mode == 'thread': + self.pid = get_native_id() # thread id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (THREAD)[/green]') + else: + self.pid = os.getpid() # process id + print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (PROCESS)[/green]') + # abx.pm.hook.on_actor_startup(self) + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: ModelType) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: ModelType) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: ModelType, err: BaseException) -> None: + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) From 48f8416762483d76f66be063bbed971249342ab4 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 3 Nov 2024 00:41:11 -0700 Subject: [PATCH 24/25] add new core and crawsl statemachine manager --- archivebox/actors/actor.py | 17 +- archivebox/actors/actor_crawl.py | 286 ------------------ archivebox/actors/orchestrator.py | 53 ++-- archivebox/actors/statemachine.py | 286 ++++++++++++++++++ archivebox/core/actors.py | 73 +++++ archivebox/core/models.py | 71 ++++- archivebox/core/settings.py | 3 +- archivebox/core/statemachines.py | 115 +++++++ archivebox/crawls/actors.py | 69 +++++ archivebox/crawls/models.py | 53 +++- archivebox/crawls/statemachines.py | 48 +++ .../abx_plugin_singlefile/__init__.py | 8 +- .../abx_plugin_singlefile/actors.py | 27 ++ .../migrations/__init__.py | 0 .../abx_spec_archivebox/states.py | 19 +- archivebox/seeds/models.py | 24 +- pyproject.toml | 9 +- uv.lock | 11 + 18 files changed, 798 insertions(+), 374 deletions(-) delete mode 100644 archivebox/actors/actor_crawl.py create mode 100644 archivebox/actors/statemachine.py create mode 100644 archivebox/core/actors.py create mode 100644 archivebox/core/statemachines.py create mode 100644 archivebox/crawls/actors.py create mode 100644 archivebox/crawls/statemachines.py create mode 100644 archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py create mode 100644 archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py index 1d59bb8f..62369793 100644 --- a/archivebox/actors/actor.py +++ b/archivebox/actors/actor.py @@ -44,16 +44,17 @@ class ActorType(ABC, Generic[ModelType]): launch_kwargs: LaunchKwargs = {} mode: Literal['thread', 'process'] = 'process' + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + QUERYSET: ClassVar[QuerySet] # the QuerySet to claim objects from CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue - CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 # the number of objects to consider when atomically getting the next object from the queue ATOMIC: ClassVar[bool] = True # whether to atomically fetch+claim the nextobject in one step, or fetch and lock it in two steps # model_type: Type[ModelType] - MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores - MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class @@ -89,18 +90,19 @@ class ActorType(ABC, Generic[ModelType]): @classmethod def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]: """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" + queue_length = queue.count() + if not queue_length: # queue is empty, spawn 0 actors + return [] + actors_to_spawn: list[LaunchKwargs] = [] max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) - queue_length = queue.count() # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them, # let the next orchestrator tick handle starting another 2 on the next tick() # if queue_length > 10: # queue is long, spawn as many as possible # actors_to_spawn += max_spawnable * [{}] - if not queue_length: # queue is empty, spawn 0 actors - return actors_to_spawn - elif queue_length > 4: # queue is medium, spawn 1 or 2 actors + if queue_length > 4: # queue is medium, spawn 1 or 2 actors actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}] else: # queue is short, spawn 1 actor actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] @@ -144,7 +146,6 @@ class ActorType(ABC, Generic[ModelType]): # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) return cls.QUERYSET - ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process) def runloop(self): diff --git a/archivebox/actors/actor_crawl.py b/archivebox/actors/actor_crawl.py deleted file mode 100644 index 11d85042..00000000 --- a/archivebox/actors/actor_crawl.py +++ /dev/null @@ -1,286 +0,0 @@ -__package__ = 'archivebox.actors' - -import os -import time -from typing import ClassVar, Generic, cast, Literal, Type -from django.utils.functional import classproperty - -from rich import print -import psutil - -from django import db -from django.db.models import QuerySet -from multiprocessing import Process, cpu_count -from threading import Thread, get_native_id - -from crawls.models import Crawl - -from .actor import ActorType, LaunchKwargs - -class CrawlActor(ActorType[Crawl]): - - QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued') - CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue - CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue - CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue - CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue - - # model_type: Type[ModelType] - MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores - MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object - - _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class - - def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs): - self.mode = mode or self.mode - self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs) - - @classproperty - def name(cls) -> str: - return cls.__name__ # type: ignore - - def __str__(self) -> str: - return self.__repr__() - - def __repr__(self) -> str: - """FaviconActor[pid=1234]""" - label = 'pid' if self.mode == 'process' else 'tid' - return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' - - ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned - - @classmethod - def get_running_actors(cls) -> list[int]: - """returns a list of pids of all running actors of this type""" - # WARNING: only works for process actors, not thread actors - if cls.mode == 'thread': - raise NotImplementedError('get_running_actors() is not implemented for thread actors') - return [ - proc.pid for proc in cls._SPAWNED_ACTOR_PIDS - if proc.is_running() and proc.status() != 'zombie' - ] - - @classmethod - def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]: - """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" - actors_to_spawn: list[LaunchKwargs] = [] - max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) - queue_length = queue.count() - - # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them, - # let the next orchestrator tick handle starting another 2 on the next tick() - # if queue_length > 10: # queue is long, spawn as many as possible - # actors_to_spawn += max_spawnable * [{}] - - if not queue_length: # queue is empty, spawn 0 actors - return actors_to_spawn - elif queue_length > 4: # queue is medium, spawn 1 or 2 actors - actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}] - else: # queue is short, spawn 1 actor - actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] - return actors_to_spawn - - @classmethod - def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: - if mode == 'thread': - return cls.fork_actor_as_thread(**launch_kwargs) - elif mode == 'process': - return cls.fork_actor_as_process(**launch_kwargs) - raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"') - - @classmethod - def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: - """Spawn a new background thread running the actor's runloop""" - actor = cls(mode='thread', **launch_kwargs) - bg_actor_thread = Thread(target=actor.runloop) - bg_actor_thread.start() - assert bg_actor_thread.native_id is not None - return bg_actor_thread.native_id - - @classmethod - def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: - """Spawn a new background process running the actor's runloop""" - actor = cls(mode='process', **launch_kwargs) - bg_actor_process = Process(target=actor.runloop) - bg_actor_process.start() - assert bg_actor_process.pid is not None - cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid)) - return bg_actor_process.pid - - @classmethod - def get_model(cls) -> Type[ModelType]: - # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty - return cls.QUERYSET.model - - @classmethod - def get_queue(cls) -> QuerySet: - """override this to provide your queryset as the queue""" - # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) - return cls.QUERYSET - - - ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process) - - def runloop(self): - """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty""" - self.on_startup() - try: - while True: - obj_to_process: ModelType | None = None - try: - obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic)) - except Exception: - pass - - if obj_to_process: - self.idle_count = 0 # reset idle count if we got an object - else: - if self.idle_count >= 30: - break # stop looping and exit if queue is empty and we have idled for 30sec - else: - # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') - self.idle_count += 1 - time.sleep(1) - continue - - self.on_tick_start(obj_to_process) - - # Process the object - try: - self.tick(obj_to_process) - except Exception as err: - print(f'[red]πŸƒβ€β™‚οΈ ERROR: {self}.tick()[/red]', err) - db.connections.close_all() # always reset the db connection after an exception to clear any pending transactions - self.on_tick_exception(obj_to_process, err) - finally: - self.on_tick_end(obj_to_process) - - self.on_shutdown(err=None) - except BaseException as err: - if isinstance(err, KeyboardInterrupt): - print() - else: - print(f'\n[red]πŸƒβ€β™‚οΈ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) - self.on_shutdown(err=err) - - def get_next(self, atomic: bool | None=None) -> ModelType | None: - """get the next object from the queue, atomically locking it if self.atomic=True""" - if atomic is None: - atomic = self.ATOMIC - - if atomic: - # fetch and claim the next object from in the queue in one go atomically - obj = self.get_next_atomic() - else: - # two-step claim: fetch the next object and lock it in a separate query - obj = self.get_queue().last() - assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE' - return obj - - def lock_next(self, obj: ModelType) -> bool: - """override this to implement a custom two-step (non-atomic)lock mechanism""" - # For example: - # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid) - # Not needed if using get_next_and_lock() to claim the object atomically - # print(f'[blue]πŸƒβ€β™‚οΈ {self}.lock()[/blue]', obj.abid or obj.id) - return True - - def claim_sql_where(self) -> str: - """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ - return self.CLAIM_WHERE - - def claim_sql_set(self) -> str: - """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ - return self.CLAIM_SET - - def claim_sql_order(self) -> str: - """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ - return self.CLAIM_ORDER - - def claim_from_top(self) -> int: - """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" - return self.CLAIM_FROM_TOP - - def get_next_atomic(self, shallow: bool=True) -> ModelType | None: - """ - claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object) - optimized for minimizing contention on the queue with other actors selecting from the same list - slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count - """ - Model = self.get_model() # e.g. ArchiveResult - table = f'{Model._meta.app_label}_{Model._meta.model_name}' # e.g. core_archiveresult - - where_sql = self.claim_sql_where() - set_sql = self.claim_sql_set() - order_by_sql = self.claim_sql_order() - choose_from_top = self.claim_from_top() - - with db.connection.cursor() as cursor: - # subquery gets the pool of the top 50 candidates sorted by sort and order - # main query selects a random one from that pool - cursor.execute(f""" - UPDATE {table} - SET {set_sql} - WHERE {where_sql} and id = ( - SELECT id FROM ( - SELECT id FROM {table} - WHERE {where_sql} - ORDER BY {order_by_sql} - LIMIT {choose_from_top} - ) candidates - ORDER BY RANDOM() - LIMIT 1 - ) - RETURNING id; - """) - result = cursor.fetchone() - - if result is None: - return None # If no rows were claimed, return None - - if shallow: - # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields: - columns = [col[0] for col in cursor.description or ['id']] - return Model(**dict(zip(columns, result))) - - # if not shallow do one extra query to get a more complete object instance (load it fully from scratch) - return Model.objects.get(id=result[0]) - - @abstractmethod - def tick(self, obj: ModelType) -> None: - """override this to process the object""" - print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) - # For example: - # do_some_task(obj) - # do_something_else(obj) - # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') - raise NotImplementedError('tick() must be implemented by the Actor subclass') - - def on_startup(self) -> None: - if self.mode == 'thread': - self.pid = get_native_id() # thread id - print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (THREAD)[/green]') - else: - self.pid = os.getpid() # process id - print(f'[green]πŸƒβ€β™‚οΈ {self}.on_startup() STARTUP (PROCESS)[/green]') - # abx.pm.hook.on_actor_startup(self) - - def on_shutdown(self, err: BaseException | None=None) -> None: - print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') - # abx.pm.hook.on_actor_shutdown(self) - - def on_tick_start(self, obj: ModelType) -> None: - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) - # abx.pm.hook.on_actor_tick_start(self, obj_to_process) - # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') - pass - - def on_tick_end(self, obj: ModelType) -> None: - # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) - # abx.pm.hook.on_actor_tick_end(self, obj_to_process) - # self.timer.end() - pass - - def on_tick_exception(self, obj: ModelType, err: BaseException) -> None: - print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) - # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py index c7fed888..df4c860b 100644 --- a/archivebox/actors/orchestrator.py +++ b/archivebox/actors/orchestrator.py @@ -3,8 +3,7 @@ __package__ = 'archivebox.actors' import os import time import itertools -import uuid -from typing import Dict, Type, Literal +from typing import Dict, Type, Literal, ClassVar from django.utils.functional import classproperty from multiprocessing import Process, cpu_count @@ -173,54 +172,36 @@ from django import db from django.db import connection +from crawls.actors import CrawlActor +from .actor_snapshot import SnapshotActor + +from abx_plugin_singlefile.actors import SinglefileActor class FaviconActor(ActorType[ArchiveResult]): - @classmethod - def get_queue(cls) -> QuerySet[ArchiveResult]: + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_SET: ClassVar[str] = 'status = "started"' + + @classproperty + def QUERYSET(cls) -> QuerySet: return ArchiveResult.objects.filter(status='failed', extractor='favicon') - - @classmethod - def get_next(cls) -> ArchiveResult | None: - # return cls.get_next_atomic( - # model=ArchiveResult, - # where='status = "failed"', - # set='status = "started"', - # order_by='created_at DESC', - # choose_from_top=cpu_count() * 10, - # ) - return cls.get_random( - model=ArchiveResult, - where='status = "failed" AND extractor = "favicon"', - set='status = "queued"', - choose_from_top=50, - ) - + def tick(self, obj: ArchiveResult): print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 if not updated: raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') - # obj.refresh_from_db() - obj.status = 'success' - - def lock(self, obj: ArchiveResult) -> bool: - """As an alternative to self.get_next_atomic(), we can use select_for_update() or manually update a semaphore field here""" - - locked = ArchiveResult.objects.filter(id=obj.id, status='queued').update(status='started') == 1 - if locked: - # obj.refresh_from_db() - obj.status = 'started' - # print(f'FaviconActor[{self.pid}] lock({obj.id}) πŸ”’') - pass - else: - print(f'FaviconActor[{self.pid}] lock({obj.id}) X') - return locked + obj.refresh_from_db() + obj.save() class ExtractorsOrchestrator(Orchestrator): actor_types = { + 'CrawlActor': CrawlActor, + 'SnapshotActor': SnapshotActor, 'FaviconActor': FaviconActor, + 'SinglefileActor': SinglefileActor, } diff --git a/archivebox/actors/statemachine.py b/archivebox/actors/statemachine.py new file mode 100644 index 00000000..53883120 --- /dev/null +++ b/archivebox/actors/statemachine.py @@ -0,0 +1,286 @@ +from statemachine import State, StateMachine +from django.db import models +from multiprocessing import Process +import psutil +import time + +# State Machine Definitions +################################################# + +class SnapshotMachine(StateMachine): + """State machine for managing Snapshot lifecycle.""" + + # States + queued = State(initial=True) + started = State() + sealed = State(final=True) + + # Transitions + start = queued.to(started, cond='can_start') + seal = started.to(sealed, cond='is_finished') + + # Events + tick = ( + queued.to.itself(unless='can_start') | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished') | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, snapshot): + self.snapshot = snapshot + super().__init__() + + def can_start(self): + return True + + def is_finished(self): + return not self.snapshot.has_pending_archiveresults() + + def before_start(self): + """Pre-start validation and setup.""" + self.snapshot.cleanup_dir() + + def after_start(self): + """Post-start side effects.""" + self.snapshot.create_pending_archiveresults() + self.snapshot.update_indices() + self.snapshot.bump_retry_at(seconds=10) + + def before_seal(self): + """Pre-seal validation and cleanup.""" + self.snapshot.cleanup_dir() + + def after_seal(self): + """Post-seal actions.""" + self.snapshot.update_indices() + self.snapshot.seal_dir() + self.snapshot.upload_dir() + self.snapshot.retry_at = None + self.snapshot.save() + + +class ArchiveResultMachine(StateMachine): + """State machine for managing ArchiveResult lifecycle.""" + + # States + queued = State(initial=True) + started = State() + succeeded = State(final=True) + backoff = State() + failed = State(final=True) + + # Transitions + start = queued.to(started, cond='can_start') + succeed = started.to(succeeded, cond='extractor_succeeded') + backoff = started.to(backoff, unless='extractor_succeeded') + retry = backoff.to(queued, cond='can_retry') + fail = backoff.to(failed, unless='can_retry') + + # Events + tick = ( + queued.to.itself(unless='can_start') | + queued.to(started, cond='can_start') | + started.to.itself(cond='extractor_still_running') | + started.to(succeeded, cond='extractor_succeeded') | + started.to(backoff, unless='extractor_succeeded') | + backoff.to.itself(cond='still_waiting_to_retry') | + backoff.to(queued, cond='can_retry') | + backoff.to(failed, unless='can_retry') + ) + + def __init__(self, archiveresult): + self.archiveresult = archiveresult + super().__init__() + + def can_start(self): + return True + + def extractor_still_running(self): + return self.archiveresult.start_ts > time.now() - timedelta(seconds=5) + + def extractor_succeeded(self): + # return check_if_extractor_succeeded(self.archiveresult) + return self.archiveresult.start_ts < time.now() - timedelta(seconds=5) + + def can_retry(self): + return self.archiveresult.retries < self.archiveresult.max_retries + + def before_start(self): + """Pre-start initialization.""" + self.archiveresult.retries += 1 + self.archiveresult.start_ts = time.now() + self.archiveresult.output = None + self.archiveresult.error = None + + def after_start(self): + """Post-start execution.""" + self.archiveresult.bump_retry_at(seconds=self.archiveresult.timeout + 5) + execute_extractor(self.archiveresult) + self.archiveresult.snapshot.bump_retry_at(seconds=5) + + def before_succeed(self): + """Pre-success validation.""" + self.archiveresult.output = get_archiveresult_output(self.archiveresult) + + def after_succeed(self): + """Post-success cleanup.""" + self.archiveresult.end_ts = time.now() + self.archiveresult.retry_at = None + self.archiveresult.update_indices() + + def before_backoff(self): + """Pre-backoff error capture.""" + self.archiveresult.error = get_archiveresult_error(self.archiveresult) + + def after_backoff(self): + """Post-backoff retry scheduling.""" + self.archiveresult.end_ts = time.now() + self.archiveresult.bump_retry_at( + seconds=self.archiveresult.timeout * self.archiveresult.retries + ) + self.archiveresult.update_indices() + + def before_fail(self): + """Pre-failure finalization.""" + self.archiveresult.retry_at = None + + def after_fail(self): + """Post-failure cleanup.""" + self.archiveresult.update_indices() + +# Models +################################################# + +class Snapshot(models.Model): + status = models.CharField(max_length=32, default='queued') + retry_at = models.DateTimeField(null=True) + + @property + def sm(self): + """Get the state machine for this snapshot.""" + return SnapshotMachine(self) + + def has_pending_archiveresults(self): + return self.archiveresult_set.exclude( + status__in=['succeeded', 'failed'] + ).exists() + + def bump_retry_at(self, seconds): + self.retry_at = time.now() + timedelta(seconds=seconds) + self.save() + + def cleanup_dir(self): + cleanup_snapshot_dir(self) + + def create_pending_archiveresults(self): + create_snapshot_pending_archiveresults(self) + + def update_indices(self): + update_snapshot_index_json(self) + update_snapshot_index_html(self) + + def seal_dir(self): + seal_snapshot_dir(self) + + def upload_dir(self): + upload_snapshot_dir(self) + + +class ArchiveResult(models.Model): + snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) + status = models.CharField(max_length=32, default='queued') + retry_at = models.DateTimeField(null=True) + retries = models.IntegerField(default=0) + max_retries = models.IntegerField(default=3) + timeout = models.IntegerField(default=60) + start_ts = models.DateTimeField(null=True) + end_ts = models.DateTimeField(null=True) + output = models.TextField(null=True) + error = models.TextField(null=True) + + def get_machine(self): + return ArchiveResultMachine(self) + + def bump_retry_at(self, seconds): + self.retry_at = time.now() + timedelta(seconds=seconds) + self.save() + + def update_indices(self): + update_archiveresult_index_json(self) + update_archiveresult_index_html(self) + + +# Actor System +################################################# + +class BaseActor: + MAX_TICK_TIME = 60 + + def tick(self, obj): + """Process a single object through its state machine.""" + machine = obj.get_machine() + + if machine.is_queued: + if machine.can_start(): + machine.start() + + elif machine.is_started: + if machine.can_seal(): + machine.seal() + + elif machine.is_backoff: + if machine.can_retry(): + machine.retry() + else: + machine.fail() + + +class Orchestrator: + """Main orchestrator that manages all actors.""" + + def __init__(self): + self.pid = None + + @classmethod + def spawn(cls): + orchestrator = cls() + proc = Process(target=orchestrator.runloop) + proc.start() + return proc.pid + + def runloop(self): + self.pid = os.getpid() + abx.pm.hook.on_orchestrator_startup(self) + + try: + while True: + self.process_queue(Snapshot) + self.process_queue(ArchiveResult) + time.sleep(0.1) + + except (KeyboardInterrupt, SystemExit): + abx.pm.hook.on_orchestrator_shutdown(self) + + def process_queue(self, model): + retry_at_reached = Q(retry_at__isnull=True) | Q(retry_at__lte=time.now()) + queue = model.objects.filter(retry_at_reached) + + if queue.exists(): + actor = BaseActor() + for obj in queue: + try: + with transaction.atomic(): + actor.tick(obj) + except Exception as e: + abx.pm.hook.on_actor_tick_exception(actor, obj, e) + + +# Periodic Tasks +################################################# + +@djhuey.periodic_task(schedule=djhuey.crontab(minute='*')) +def ensure_orchestrator_running(): + """Ensure orchestrator is running, start if not.""" + if not any(p.name().startswith('Orchestrator') for p in psutil.process_iter()): + Orchestrator.spawn() diff --git a/archivebox/core/actors.py b/archivebox/core/actors.py new file mode 100644 index 00000000..30b8245f --- /dev/null +++ b/archivebox/core/actors.py @@ -0,0 +1,73 @@ +__package__ = 'archivebox.core' + +from typing import ClassVar + +from rich import print + +from django.db.models import QuerySet +from django.utils import timezone +from datetime import timedelta +from core.models import Snapshot + +from actors.actor import ActorType + + +class SnapshotActor(ActorType[Snapshot]): + + QUERYSET: ClassVar[QuerySet] = Snapshot.objects.filter(status='queued') + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + retry_at = timezone.now() + timedelta(seconds=self.MAX_TICK_TIME) + # format as 2024-10-31 10:14:33.240903 + retry_at_str = retry_at.strftime('%Y-%m-%d %H:%M:%S.%f') + return f'{self.CLAIM_SET}, retry_at = {retry_at_str}' + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def tick(self, obj: Snapshot) -> None: + """override this to process the object""" + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + # raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: Snapshot) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: Snapshot) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: Snapshot, err: BaseException) -> None: + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 79776b7f..a3962a6a 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -8,21 +8,25 @@ import os import json from pathlib import Path +from datetime import timedelta from django.db import models from django.utils.functional import cached_property from django.utils.text import slugify +from django.utils import timezone from django.core.cache import cache from django.urls import reverse, reverse_lazy from django.db.models import Case, When, Value, IntegerField from django.contrib import admin from django.conf import settings +from statemachine.mixins import MachineMixin + from archivebox.config import CONSTANTS from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField from queues.tasks import bg_archive_snapshot -# from crawls.models import Crawl +from crawls.models import Crawl # from machine.models import Machine, NetworkInterface from archivebox.misc.system import get_dir_size @@ -152,7 +156,7 @@ class SnapshotManager(models.Manager): return super().get_queryset().prefetch_related('tags', 'archiveresult_set') # .annotate(archiveresult_count=models.Count('archiveresult')).distinct() -class Snapshot(ABIDModel): +class Snapshot(ABIDModel, MachineMixin): abid_prefix = 'snp_' abid_ts_src = 'self.created_at' abid_uri_src = 'self.url' @@ -160,6 +164,17 @@ class Snapshot(ABIDModel): abid_rand_src = 'self.id' abid_drift_allowed = True + state_field_name = 'status' + state_machine_name = 'core.statemachines.SnapshotMachine' + state_machine_attr = 'sm' + + class SnapshotStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SEALED = 'sealed', 'Sealed' + + status = models.CharField(max_length=15, default=SnapshotStatus.QUEUED, null=False, blank=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) @@ -171,7 +186,7 @@ class Snapshot(ABIDModel): bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True) downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True) - # crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set') + crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set') url = models.URLField(unique=True, db_index=True) timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) @@ -396,6 +411,25 @@ class Snapshot(ABIDModel): tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk) self.tags.clear() self.tags.add(*tags_id) + + def has_pending_archiveresults(self) -> bool: + pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED] + pending_archiveresults = self.archiveresult_set.filter(status__in=pending_statuses) + return pending_archiveresults.exists() + + def create_pending_archiveresults(self) -> list['ArchiveResult']: + archiveresults = [] + for extractor in EXTRACTORS: + archiveresult, _created = ArchiveResult.objects.get_or_create( + snapshot=self, + extractor=extractor, + status=ArchiveResult.ArchiveResultStatus.QUEUED, + ) + archiveresults.append(archiveresult) + return archiveresults + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) # def get_storage_dir(self, create=True, symlink=True) -> Path: @@ -452,6 +486,20 @@ class ArchiveResult(ABIDModel): abid_subtype_src = 'self.extractor' abid_rand_src = 'self.id' abid_drift_allowed = True + + state_field_name = 'status' + state_machine_name = 'core.statemachines.ArchiveResultMachine' + state_machine_attr = 'sm' + + class ArchiveResultStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SUCCEEDED = 'succeeded', 'Succeeded' + FAILED = 'failed', 'Failed' + SKIPPED = 'skipped', 'Skipped' + BACKOFF = 'backoff', 'Waiting to retry' + + status = models.CharField(max_length=15, choices=ArchiveResultStatus.choices, default=ArchiveResultStatus.QUEUED, null=False, blank=False) EXTRACTOR_CHOICES = ( ('htmltotext', 'htmltotext'), @@ -469,11 +517,7 @@ class ArchiveResult(ABIDModel): ('title', 'title'), ('wget', 'wget'), ) - STATUS_CHOICES = [ - ("succeeded", "succeeded"), - ("failed", "failed"), - ("skipped", "skipped") - ] + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) @@ -491,7 +535,6 @@ class ArchiveResult(ABIDModel): output = models.CharField(max_length=1024) start_ts = models.DateTimeField(db_index=True) end_ts = models.DateTimeField() - status = models.CharField(max_length=16, choices=STATUS_CHOICES) # the network interface that was used to download this result # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used') @@ -552,7 +595,15 @@ class ArchiveResult(ABIDModel): return link.canonical_outputs().get(f'{self.extractor}_path') def output_exists(self) -> bool: - return os.access(self.output_path(), os.R_OK) + return os.path.exists(self.output_path()) + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) + + def create_output_dir(self): + snap_dir = self.snapshot_dir + snap_dir.mkdir(parents=True, exist_ok=True) + return snap_dir / self.output_path() # def get_storage_dir(self, create=True, symlink=True): diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index cdcf867f..e7d673ac 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -64,7 +64,8 @@ INSTALLED_APPS = [ # 'abid_utils', # handles ABID ID creation, handling, and models 'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc. - 'queues', # handles starting and managing background workers and processes + 'actors', # handles starting and managing background workers and processes (orchestrators and actors) + 'queues', # handles starting and managing background workers and processes (supervisord) 'seeds', # handles Seed model and URL source management 'crawls', # handles Crawl and CrawlSchedule models and management 'personas', # handles Persona and session management diff --git a/archivebox/core/statemachines.py b/archivebox/core/statemachines.py new file mode 100644 index 00000000..a2425d43 --- /dev/null +++ b/archivebox/core/statemachines.py @@ -0,0 +1,115 @@ +__package__ = 'archivebox.snapshots' + +from django.utils import timezone + +from statemachine import State, StateMachine + +from core.models import Snapshot, ArchiveResult + +# State Machine Definitions +################################################# + + +class SnapshotMachine(StateMachine, strict_states=True): + """State machine for managing Snapshot lifecycle.""" + + model: Snapshot + + # States + queued = State(value=Snapshot.SnapshotStatus.QUEUED, initial=True) + started = State(value=Snapshot.SnapshotStatus.STARTED) + sealed = State(value=Snapshot.SnapshotStatus.SEALED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, snapshot, *args, **kwargs): + self.snapshot = snapshot + super().__init__(snapshot, *args, **kwargs) + + def can_start(self) -> bool: + return self.snapshot.seed and self.snapshot.seed.uri + + def is_finished(self) -> bool: + return not self.snapshot.has_pending_archiveresults() + + def on_started(self): + self.snapshot.create_pending_archiveresults() + self.snapshot.bump_retry_at(seconds=60) + self.snapshot.save() + + def on_sealed(self): + self.snapshot.retry_at = None + self.snapshot.save() + +class ArchiveResultMachine(StateMachine, strict_states=True): + """State machine for managing ArchiveResult lifecycle.""" + + model: ArchiveResult + + # States + queued = State(value=ArchiveResult.ArchiveResultStatus.QUEUED, initial=True) + started = State(value=ArchiveResult.ArchiveResultStatus.STARTED) + backoff = State(value=ArchiveResult.ArchiveResultStatus.BACKOFF) + succeeded = State(value=ArchiveResult.ArchiveResultStatus.SUCCEEDED, final=True) + failed = State(value=ArchiveResult.ArchiveResultStatus.FAILED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(succeeded, cond='is_succeeded') | + started.to(failed, cond='is_failed') | + started.to(backoff, cond='is_backoff') | + backoff.to.itself(unless='can_start', internal=True) | + backoff.to(started, cond='can_start') | + backoff.to(succeeded, cond='is_succeeded') | + backoff.to(failed, cond='is_failed') + ) + + def __init__(self, archiveresult, *args, **kwargs): + self.archiveresult = archiveresult + super().__init__(archiveresult, *args, **kwargs) + + def can_start(self) -> bool: + return self.archiveresult.snapshot and self.archiveresult.snapshot.is_started() + + def is_succeeded(self) -> bool: + return self.archiveresult.output_exists() + + def is_failed(self) -> bool: + return not self.archiveresult.output_exists() + + def is_backoff(self) -> bool: + return self.archiveresult.status == ArchiveResult.ArchiveResultStatus.BACKOFF + + def on_started(self): + self.archiveresult.start_ts = timezone.now() + self.archiveresult.create_output_dir() + self.archiveresult.bump_retry_at(seconds=60) + self.archiveresult.save() + + def on_backoff(self): + self.archiveresult.bump_retry_at(seconds=60) + self.archiveresult.save() + + def on_succeeded(self): + self.archiveresult.end_ts = timezone.now() + self.archiveresult.save() + + def on_failed(self): + self.archiveresult.end_ts = timezone.now() + self.archiveresult.save() + + def after_transition(self, event: str, source: State, target: State): + print(f"after '{event}' from '{source.id}' to '{target.id}'") + # self.archiveresult.save_merkle_index() + # self.archiveresult.save_html_index() + # self.archiveresult.save_json_index() + return "after_transition" diff --git a/archivebox/crawls/actors.py b/archivebox/crawls/actors.py new file mode 100644 index 00000000..f159956e --- /dev/null +++ b/archivebox/crawls/actors.py @@ -0,0 +1,69 @@ +__package__ = 'archivebox.crawls' + +from typing import ClassVar + +from rich import print + +from django.db.models import QuerySet + +from crawls.models import Crawl + +from actors.actor import ActorType + + +class CrawlActor(ActorType[Crawl]): + + QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued') + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + return self.CLAIM_SET + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def tick(self, obj: Crawl) -> None: + """override this to process the object""" + print(f'[blue]πŸƒβ€β™‚οΈ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + # raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]πŸƒβ€β™‚οΈ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: Crawl) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: Crawl) -> None: + # print(f'πŸƒβ€β™‚οΈ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: Crawl, err: BaseException) -> None: + print(f'[red]πŸƒβ€β™‚οΈ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/crawls/models.py b/archivebox/crawls/models.py index a806d889..ff9e0d0a 100644 --- a/archivebox/crawls/models.py +++ b/archivebox/crawls/models.py @@ -1,13 +1,20 @@ __package__ = 'archivebox.crawls' +from typing import TYPE_CHECKING from django_stubs_ext.db.models import TypedModelMeta +from datetime import timedelta + from django.db import models -from django.db.models import Q from django.core.validators import MaxValueValidator, MinValueValidator from django.conf import settings -from django.utils import timezone from django.urls import reverse_lazy +from django.utils import timezone + +from statemachine.mixins import MachineMixin + +if TYPE_CHECKING: + from core.models import Snapshot from seeds.models import Seed @@ -41,8 +48,9 @@ class CrawlSchedule(ABIDModel, ModelWithHealthStats): """The base crawl that each new scheduled job should copy as a template""" return self.crawl_set.first() + -class Crawl(ABIDModel, ModelWithHealthStats): +class Crawl(ABIDModel, ModelWithHealthStats, MachineMixin): """ A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak. @@ -55,16 +63,29 @@ class Crawl(ABIDModel, ModelWithHealthStats): abid_prefix = 'crl_' abid_ts_src = 'self.created_at' abid_uri_src = 'self.seed.uri' - abid_subtype_src = 'self.persona_id' + abid_subtype_src = 'self.persona' abid_rand_src = 'self.id' abid_drift_allowed = True + + state_field_name = 'status' + state_machine_name = 'crawls.statemachines.CrawlMachine' + state_machine_attr = 'sm' + bind_events_as_methods = True + class CrawlStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SEALED = 'sealed', 'Sealed' + + status = models.CharField(choices=CrawlStatus.choices, max_length=15, default=CrawlStatus.QUEUED, null=False, blank=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set') created_at = AutoDateTimeField(default=None, null=False, db_index=True) modified_at = models.DateTimeField(auto_now=True) + seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False) max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)]) @@ -79,7 +100,7 @@ class Crawl(ABIDModel, ModelWithHealthStats): # schedule = models.JSONField() # config = models.JSONField() - # snapshot_set: models.Manager['Snapshot'] + snapshot_set: models.Manager['Snapshot'] class Meta(TypedModelMeta): @@ -102,6 +123,28 @@ class Crawl(ABIDModel, ModelWithHealthStats): @property def api_docs_url(self) -> str: return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl' + + def has_pending_archiveresults(self) -> bool: + from core.models import ArchiveResult + + pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED] + + snapshot_ids = self.snapshot_set.values_list('id', flat=True) + pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, status__in=pending_statuses) + return pending_archiveresults.exists() + + def create_root_snapshot(self) -> 'Snapshot': + from core.models import Snapshot + + root_snapshot, _ = Snapshot.objects.get_or_create( + crawl=self, + url=self.seed.uri, + ) + return root_snapshot + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) + self.save() class Outlink(models.Model): diff --git a/archivebox/crawls/statemachines.py b/archivebox/crawls/statemachines.py new file mode 100644 index 00000000..b7e43daf --- /dev/null +++ b/archivebox/crawls/statemachines.py @@ -0,0 +1,48 @@ +__package__ = 'archivebox.crawls' + +from statemachine import State, StateMachine + +from crawls.models import Crawl + +# State Machine Definitions +################################################# + + +class CrawlMachine(StateMachine, strict_states=True): + """State machine for managing Crawl lifecycle.""" + + model: Crawl + + # States + queued = State(value=Crawl.CrawlStatus.QUEUED, initial=True) + started = State(value=Crawl.CrawlStatus.STARTED) + sealed = State(value=Crawl.CrawlStatus.SEALED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, crawl, *args, **kwargs): + self.crawl = crawl + super().__init__(crawl, *args, **kwargs) + + def can_start(self) -> bool: + return self.crawl.seed and self.crawl.seed.uri + + def is_finished(self) -> bool: + return not self.crawl.has_pending_archiveresults() + + + + def on_started(self): + self.crawl.create_root_snapshot() + self.crawl.bump_retry_at(seconds=10) + self.crawl.save() + + def on_sealed(self): + self.crawl.retry_at = None + self.crawl.save() diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py index ddfb4236..be6dcd02 100644 --- a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py @@ -29,7 +29,7 @@ def get_EXTRACTORS(): 'singlefile': SINGLEFILE_EXTRACTOR, } -# @abx.hookimpl -# def get_INSTALLED_APPS(): -# # needed to load ./models.py -# return [__package__] +@abx.hookimpl +def get_INSTALLED_APPS(): + # needed to load ./models.py + return [__package__] diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py new file mode 100644 index 00000000..d928d0fd --- /dev/null +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py @@ -0,0 +1,27 @@ +__package__ = 'abx_plugin_singlefile' + +from typing import ClassVar +from django.db.models import QuerySet +from django.utils.functional import classproperty + +from actors.actor import ActorType + +from .models import SinglefileResult + + +class SinglefileActor(ActorType[SinglefileResult]): + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_SET: ClassVar[str] = 'status = "started"' + + @classproperty + def QUERYSET(cls) -> QuerySet: + return SinglefileResult.objects.filter(status='queued') + + def tick(self, obj: SinglefileResult): + print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) + updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 + if not updated: + raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') + obj.refresh_from_db() + obj.save() diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py index a56649da..05284f37 100644 --- a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py @@ -20,6 +20,17 @@ from django.urls import reverse_lazy from pathlib import Path +# Glossary: +# - startup: when a new process is spawned +# - shutdown: when a process is exiting +# - start: at the beginning of some python code block +# - end: at the end of some python code block +# - queue: a django queryset of objects of a single type that are waiting to be processed +# - actor: a long-running daemon process that wakes up and processes a single object from a queue at a time +# - plugin: a python package that defines some hookimpls based on hookspecs exposed by ABX +# - object: an instance of a django model that represents a single row in the database + + # ORCHESTRATOR: # An orchestrator is a single long-running daemon process that manages spawning and killing actors for different queues of objects. # The orchestrator first starts when the archivebox starts, and it stops when archivebox is killed. @@ -74,8 +85,8 @@ from pathlib import Path # On startup an actor should fire abx.pm.hook.on_actor_startup(object) and on exit it should fire abx.pm.hook.on_actor_exit(object) (both syncronous hooks that can be used by plugins to register any startup/cleanup code). # An ActorType defines the following hookspecs for plugins to hook into its behavior: # - abx.pm.hook.on_actor_startup(actor, queue) -# - abx.pm.hook.on_actor_tick_started(actor, object) -# - abx.pm.hook.on_actor_tick_finished(actor, object) +# - abx.pm.hook.on_actor_tick_start(actor, object) +# - abx.pm.hook.on_actor_tick_end(actor, object) # - abx.pm.hook.on_actor_tick_exception(actor, object, exception) # - abx.pm.hook.on_actor_shutdown(actor) @@ -107,8 +118,8 @@ from pathlib import Path # - external API calls (e.g. uploading to s3, firing a webhook, writing to a logfile, etc.) # - DO NOT use side effects to directly mutate other objects state or trigger other state transitions # ABX defines the following hookspecs for plugins to hook into transition behavior: -# - abx.pm.hook.on_transition__from_abx_to_xyz_started(object) -# - abx.pm.hook.on_transition__from_abx_to_xyz_succeeded(object) +# - abx.pm.hook.on_transition__from_abx_to_xyz_start(object) +# - abx.pm.hook.on_transition__from_abx_to_xyz_end(object) # READ: # A read() method is a function defined for a given ActorType that performs a single read from the DB and/or other read models like django cache, filesystem, in-memory caches, etc. diff --git a/archivebox/seeds/models.py b/archivebox/seeds/models.py index b0d83b2e..7fe49c83 100644 --- a/archivebox/seeds/models.py +++ b/archivebox/seeds/models.py @@ -1,19 +1,8 @@ __package__ = 'archivebox.seeds' -from datetime import datetime - -from django_stubs_ext.db.models import TypedModelMeta - from django.db import models -from django.db.models import Q -from django.core.validators import MaxValueValidator, MinValueValidator from django.conf import settings -from django.utils import timezone -from django.utils.functional import cached_property -from django.urls import reverse_lazy - -from pathlib import Path from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats @@ -47,7 +36,10 @@ class Seed(ABIDModel, ModelWithHealthStats): abid_rand_src = 'self.id' abid_drift_allowed = True - uri = models.URLField(max_length=255, blank=False, null=False, unique=True) # unique source location where URLs will be loaded from + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') + abid = ABIDField(prefix=abid_prefix) + + uri = models.URLField(max_length=2000, blank=False, null=False) # unique source location where URLs will be loaded from extractor = models.CharField(default='auto', max_length=32) # suggested extractor to use to load this URL source tags_str = models.CharField(max_length=255, null=False, blank=True, default='') # tags to attach to any URLs that come from this source @@ -64,4 +56,10 @@ class Seed(ABIDModel, ModelWithHealthStats): # pocketapi:// # s3:// # etc.. - return self.uri.split('://')[0].lower() + return self.uri.split('://', 1)[0].lower() + + class Meta: + verbose_name = 'Seed' + verbose_name_plural = 'Seeds' + + unique_together = (('created_by', 'uri', 'extractor'),) diff --git a/pyproject.toml b/pyproject.toml index e8cec024..aceae950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ dependencies = [ "pluggy>=1.5.0", "requests>=2.32.3", "dateparser>=1.2.0", - "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553 + "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553 "feedparser>=6.0.11", "w3lib>=2.2.1", "rich>=13.8.0", @@ -86,40 +86,35 @@ dependencies = [ "yt-dlp>=2024.8.6", # for: media" ############# Plugin Dependencies ################ "abx>=0.1.0", - "abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-config>=0.1.0", "abx-spec-archivebox>=0.1.0", "abx-spec-django>=0.1.0", "abx-spec-extractor>=0.1.0", "abx-spec-searchbackend>=0.1.0", - "abx-plugin-default-binproviders>=2024.10.24", "abx-plugin-pip>=2024.10.24", "abx-plugin-npm>=2024.10.24", "abx-plugin-playwright>=2024.10.24", "abx-plugin-puppeteer>=2024.10.28", - "abx-plugin-ripgrep-search>=2024.10.28", "abx-plugin-sqlitefts-search>=2024.10.28", "abx-plugin-sonic-search>=2024.10.28", "abx-plugin-ldap-auth>=2024.10.28", - "abx-plugin-curl>=2024.10.27", "abx-plugin-wget>=2024.10.28", "abx-plugin-git>=2024.10.28", "abx-plugin-chrome>=2024.10.28", "abx-plugin-ytdlp>=2024.10.28", - "abx-plugin-title>=2024.10.27", "abx-plugin-favicon>=2024.10.27", # "abx-plugin-headers>=2024.10.27", "abx-plugin-archivedotorg>=2024.10.28", - "abx-plugin-singlefile>=2024.10.28", "abx-plugin-readability>=2024.10.28", "abx-plugin-mercury>=2024.10.28", "abx-plugin-htmltotext>=2024.10.28", + "python-statemachine>=2.3.6", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index b29258ab..761668b7 100644 --- a/uv.lock +++ b/uv.lock @@ -661,6 +661,7 @@ dependencies = [ { name = "pydantic-settings" }, { name = "python-benedict", extra = ["io", "parse"] }, { name = "python-crontab" }, + { name = "python-statemachine" }, { name = "requests" }, { name = "rich" }, { name = "rich-argparse" }, @@ -789,6 +790,7 @@ requires-dist = [ { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" }, { name = "python-crontab", specifier = ">=3.2.0" }, { name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" }, + { name = "python-statemachine", specifier = ">=2.3.6" }, { name = "requests", specifier = ">=2.32.3" }, { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" }, { name = "rich", specifier = ">=13.8.0" }, @@ -2729,6 +2731,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", size = 10051 }, ] +[[package]] +name = "python-statemachine" +version = "2.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/c9/7034a362ce151f9fa0ead5630727a16122f7a5ed235d42447910dff95b6a/python_statemachine-2.3.6.tar.gz", hash = "sha256:9cb4040ca7f2158d3cd46f36a77b420b6ef95a90223928a7f3cab232a70bd560", size = 36735 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/04/19a44b549cbaae1ac6c2acc58afb96b71209da866713877f40aab2f45de6/python_statemachine-2.3.6-py3-none-any.whl", hash = "sha256:0001b02cbe2f5b2420c423b5b3e3a33915447ac6d9735219c929e2378d454f5f", size = 41529 }, +] + [[package]] name = "python-stdnum" version = "1.20" From 758c0c677439a8f835210f3037bd563d3e0689d1 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sun, 3 Nov 2024 12:54:04 -0800 Subject: [PATCH 25/25] add user providable PLAYWRIGHT cache dir --- .../abx_plugin_playwright/binproviders.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py index 6bc44815..a0711666 100644 --- a/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py +++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py @@ -25,10 +25,16 @@ import abx from .binaries import PLAYWRIGHT_BINARY - +USER_PLAYWRIGHT_CACHE_DIR: str | None = os.environ.get("PLAYWRIGHT_BROWSERS_PATH", None) MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright") LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright") +PLAYWRIGHT_CACHE_DIR: Path = Path(USER_PLAYWRIGHT_CACHE_DIR) if USER_PLAYWRIGHT_CACHE_DIR else ( + MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() + if OPERATING_SYSTEM == "darwin" else + LINUX_PLAYWRIGHT_CACHE_DIR.expanduser() +) + class PlaywrightBinProvider(BinProvider): name: BinProviderName = "playwright" @@ -36,11 +42,7 @@ class PlaywrightBinProvider(BinProvider): PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}" - playwright_browsers_dir: Path = ( - MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() - if OPERATING_SYSTEM == "darwin" else - LINUX_PLAYWRIGHT_CACHE_DIR.expanduser() - ) + playwright_browsers_dir: Path = PLAYWRIGHT_CACHE_DIR playwright_install_args: List[str] = ["install"] packages_handler: BinProviderOverrides = Field(default={ @@ -49,7 +51,6 @@ class PlaywrightBinProvider(BinProvider): _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} - @computed_field @property def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None: try: