diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 79cc28e7..b1153211 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,7 +102,7 @@ jobs:
# TODO: remove this exception for windows once we get tests passing on that platform
if: ${{ !contains(matrix.os, 'windows') }}
run: |
- python -m pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist
+ python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs
docker_tests:
runs-on: ubuntu-latest
diff --git a/.gitmodules b/.gitmodules
index db744b8a..e260fdf5 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,9 +1,3 @@
[submodule "docs"]
path = docs
url = https://github.com/ArchiveBox/ArchiveBox.wiki.git
-[submodule "archivebox/vendor/pocket"]
- path = archivebox/vendor/pocket
- url = https://github.com/tapanpandita/pocket
-[submodule "archivebox/vendor/pydantic-pkgr"]
- path = archivebox/vendor/pydantic-pkgr
- url = https://github.com/ArchiveBox/pydantic-pkgr
diff --git a/archivebox/.flake8 b/archivebox/.flake8
index 01af646d..bb7176bd 100644
--- a/archivebox/.flake8
+++ b/archivebox/.flake8
@@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E
select = F,E9,W
max-line-length = 130
max-complexity = 10
-exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv
+exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv,data,data*
diff --git a/archivebox/__init__.py b/archivebox/__init__.py
index bb2a9806..fd32778c 100755
--- a/archivebox/__init__.py
+++ b/archivebox/__init__.py
@@ -13,8 +13,8 @@ __package__ = 'archivebox'
import os
import sys
-
from pathlib import Path
+from typing import cast
ASCII_LOGO = """
█████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗ ██████╗ ██████╗ ██╗ ██╗
@@ -47,11 +47,54 @@ from .monkey_patches import * # noqa
# print('LOADING VENDORED LIBRARIES')
-from .vendor import load_vendored_libs # noqa
-load_vendored_libs()
+from .pkgs import load_vendored_pkgs # noqa
+load_vendored_pkgs()
# print('DONE LOADING VENDORED LIBRARIES')
+# Load ABX Plugin Specifications + Default Implementations
+import abx # noqa
+import abx_spec_archivebox # noqa
+import abx_spec_config # noqa
+import abx_spec_pydantic_pkgr # noqa
+import abx_spec_django # noqa
+import abx_spec_searchbackend # noqa
+abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC)
+abx.pm.register(abx_spec_config.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_pydantic_pkgr.PLUGIN_SPEC)
+abx.pm.register(abx_spec_pydantic_pkgr.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_django.PLUGIN_SPEC)
+abx.pm.register(abx_spec_django.PLUGIN_SPEC())
+
+abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC)
+abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC())
+
+# Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods
+abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm)
+pm = abx.pm
+
+
+# Load all pip-installed ABX-compatible plugins
+ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
+
+# Load all built-in ArchiveBox plugins
+ARCHIVEBOX_BUILTIN_PLUGINS = {
+ 'config': PACKAGE_DIR / 'config',
+ 'core': PACKAGE_DIR / 'core',
+ # 'search': PACKAGE_DIR / 'search',
+ # 'core': PACKAGE_DIR / 'core',
+}
+
+# Load all user-defined ArchiveBox plugins
+USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins')
+
+# Import all plugins and register them with ABX Plugin Manager
+ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS}
+LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS)
+
+# Setup basic config, constants, paths, and version
from .config.constants import CONSTANTS # noqa
from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
from .config.version import VERSION # noqa
diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py
index b1b43c7d..5a9ab109 100644
--- a/archivebox/abid_utils/models.py
+++ b/archivebox/abid_utils/models.py
@@ -175,7 +175,7 @@ class ABIDModel(models.Model):
'uri': self.abid_uri_src,
'subtype': self.abid_subtype_src,
'rand': self.abid_rand_src,
- 'salt': 'self.abid_salt', # defined as static class vars at build time
+ 'salt': 'self.abid_salt', # defined as static class vars at build time
}
@property
diff --git a/archivebox/abx/__init__.py b/archivebox/abx/__init__.py
deleted file mode 100644
index c571a2e3..00000000
--- a/archivebox/abx/__init__.py
+++ /dev/null
@@ -1,131 +0,0 @@
-__package__ = 'abx'
-
-import importlib
-from pathlib import Path
-from typing import Dict, Callable, List
-
-from . import hookspec as base_spec
-from abx.hookspec import hookimpl, hookspec # noqa
-from abx.manager import pm, PluginManager # noqa
-
-
-pm.add_hookspecs(base_spec)
-
-
-###### PLUGIN DISCOVERY AND LOADING ########################################################
-
-def get_plugin_order(plugin_entrypoint: Path):
- order = 999
- try:
- # if .plugin_order file exists, use it to set the load priority
- order = int((plugin_entrypoint.parent / '.plugin_order').read_text())
- except FileNotFoundError:
- pass
- return (order, plugin_entrypoint)
-
-def register_hookspecs(hookspecs: List[str]):
- """
- Register all the hookspecs from a list of module names.
- """
- for hookspec_import_path in hookspecs:
- hookspec_module = importlib.import_module(hookspec_import_path)
- pm.add_hookspecs(hookspec_module)
-
-
-def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
- """
- Find all the plugins in a given directory. Just looks for an __init__.py file.
- """
- return {
- f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
- for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
- if plugin_entrypoint.parent.name != 'abx'
- } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
-
-
-def get_pip_installed_plugins(group='abx'):
- """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
- import importlib.metadata
-
- DETECTED_PLUGINS = {} # module_name: module_dir_path
- for dist in list(importlib.metadata.distributions()):
- for entrypoint in dist.entry_points:
- if entrypoint.group != group or pm.is_blocked(entrypoint.name):
- continue
- DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
- # pm.register(plugin, name=ep.name)
- # pm._plugin_distinfo.append((plugin, DistFacade(dist)))
- return DETECTED_PLUGINS
-
-
-def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
- """
- Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
- """
- DETECTED_PLUGINS = {}
- for plugin_prefix, plugin_dir in plugin_dirs.items():
- DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
- return DETECTED_PLUGINS
-
-
-# Load all plugins from pip packages, archivebox built-ins, and user plugins
-
-def load_plugins(plugins_dict: Dict[str, Path]):
- """
- Load all the plugins from a dictionary of module names and directory paths.
- """
- LOADED_PLUGINS = {}
- for plugin_module, plugin_dir in plugins_dict.items():
- # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
- plugin_module_loaded = importlib.import_module(plugin_module)
- pm.register(plugin_module_loaded)
- LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
- # print(f' √ Loaded plugin: {plugin_module}')
- return LOADED_PLUGINS
-
-def get_registered_plugins():
- """
- Get all the plugins registered with Pluggy.
- """
- plugins = {}
- plugin_to_distinfo = dict(pm.list_plugin_distinfo())
- for plugin in pm.get_plugins():
- plugin_info = {
- "name": plugin.__name__,
- "hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
- }
- distinfo = plugin_to_distinfo.get(plugin)
- if distinfo:
- plugin_info["version"] = distinfo.version
- plugin_info["name"] = (
- getattr(distinfo, "name", None) or distinfo.project_name
- )
- plugins[plugin_info["name"]] = plugin_info
- return plugins
-
-
-
-
-def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
- """
- Get all the functions marked with @hookimpl on a module.
- """
- if not plugin_pkg:
- return {}
-
- hooks = {}
-
- plugin_module = importlib.import_module(plugin_pkg)
- for attr_name in dir(plugin_module):
- if attr_name.startswith('_'):
- continue
- try:
- attr = getattr(plugin_module, attr_name)
- if isinstance(attr, Callable):
- hooks[attr_name] = None
- pm.parse_hookimpl_opts(plugin_module, attr_name)
- hooks[attr_name] = attr
- except Exception as e:
- print(f'Error getting hookimpls for {plugin_pkg}: {e}')
-
- return hooks
diff --git a/archivebox/abx/archivebox/__init__.py b/archivebox/abx/archivebox/__init__.py
deleted file mode 100644
index 58bbb447..00000000
--- a/archivebox/abx/archivebox/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import os
-import importlib
-
-from typing import Dict
-from pathlib import Path
-
-
-def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
- """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
- LOADED_PLUGINS = {}
- for plugin_module, plugin_dir in reversed(plugins_dict.items()):
- # print(f'Loading plugin: {plugin_module} from {plugin_dir}')
-
- # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
- try:
- plugin_module_loaded = importlib.import_module(plugin_module)
- pm.register(plugin_module_loaded)
- except Exception as e:
- print(f'Error registering plugin: {plugin_module} - {e}')
-
-
- # 2. then try to import plugin_module.apps as well
- if os.access(plugin_dir / 'apps.py', os.R_OK):
- plugin_apps = importlib.import_module(plugin_module + '.apps')
- pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
-
- # print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
- return LOADED_PLUGINS
diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py
deleted file mode 100644
index 7890c05b..00000000
--- a/archivebox/abx/archivebox/base_binary.py
+++ /dev/null
@@ -1,106 +0,0 @@
-__package__ = "abx.archivebox"
-
-import os
-from typing import Optional, cast
-from typing_extensions import Self
-
-from pydantic import validate_call
-from pydantic_pkgr import (
- Binary,
- BinProvider,
- BinProviderName,
- AptProvider,
- BrewProvider,
- EnvProvider,
-)
-
-from archivebox.config.permissions import ARCHIVEBOX_USER
-
-
-class BaseBinProvider(BinProvider):
-
- # TODO: add install/load/load_or_install methods as abx.hookimpl methods
-
- @property
- def admin_url(self) -> str:
- # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
- return "/admin/environment/binaries/"
-
-class BaseBinary(Binary):
-
- @staticmethod
- def symlink_to_lib(binary, bin_dir=None) -> None:
- from archivebox.config.common import STORAGE_CONFIG
- bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
-
- if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
- return
-
- try:
- bin_dir.mkdir(parents=True, exist_ok=True)
- symlink = bin_dir / binary.name
- symlink.unlink(missing_ok=True)
- symlink.symlink_to(binary.abspath)
- symlink.chmod(0o777) # make sure its executable by everyone
- except Exception as err:
- # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
- # not actually needed, we can just run without it
- pass
-
- @validate_call
- def load(self, fresh=False, **kwargs) -> Self:
- from archivebox.config.common import STORAGE_CONFIG
- if fresh:
- binary = super().load(**kwargs)
- self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
- else:
- # get cached binary from db
- try:
- from machine.models import InstalledBinary
- installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore
- binary = InstalledBinary.load_from_db(installed_binary)
- except Exception:
- # maybe we are not in a DATA dir so there is no db, fallback to reading from fs
- # (e.g. when archivebox version is run outside of a DATA dir)
- binary = super().load(**kwargs)
- return cast(Self, binary)
-
- @validate_call
- def install(self, **kwargs) -> Self:
- from archivebox.config.common import STORAGE_CONFIG
- binary = super().install(**kwargs)
- self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
- return binary
-
- @validate_call
- def load_or_install(self, fresh=False, **kwargs) -> Self:
- from archivebox.config.common import STORAGE_CONFIG
- try:
- binary = self.load(fresh=fresh)
- if binary and binary.version:
- self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
- return binary
- except Exception:
- pass
- return self.install(**kwargs)
-
- @property
- def admin_url(self) -> str:
- # e.g. /admin/environment/config/LdapConfig/
- return f"/admin/environment/binaries/{self.name}/"
-
-
-class AptBinProvider(AptProvider, BaseBinProvider):
- name: BinProviderName = "apt"
-
-class BrewBinProvider(BrewProvider, BaseBinProvider):
- name: BinProviderName = "brew"
-
-class EnvBinProvider(EnvProvider, BaseBinProvider):
- name: BinProviderName = "env"
-
- euid: Optional[int] = ARCHIVEBOX_USER
-
-apt = AptBinProvider()
-brew = BrewBinProvider()
-env = EnvBinProvider()
diff --git a/archivebox/abx/archivebox/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py
deleted file mode 100644
index f78921e0..00000000
--- a/archivebox/abx/archivebox/base_extractor.py
+++ /dev/null
@@ -1,219 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import json
-import os
-
-from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
-from typing_extensions import Self
-from pathlib import Path
-
-from pydantic import model_validator, AfterValidator
-from pydantic_pkgr import BinName
-from django.utils.functional import cached_property
-from django.utils import timezone
-
-import abx
-
-from .base_binary import BaseBinary
-
-
-def no_empty_args(args: List[str]) -> List[str]:
- assert all(len(arg) for arg in args)
- return args
-
-ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str
-
-HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
-CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)]
-
-
-class BaseExtractor:
-
- name: ExtractorName
- binary: BinName
-
- output_path_func: HandlerFuncStr = 'self.get_output_path'
- should_extract_func: HandlerFuncStr = 'self.should_extract'
- extract_func: HandlerFuncStr = 'self.extract'
- exec_func: HandlerFuncStr = 'self.exec'
-
- default_args: CmdArgsList = []
- extra_args: CmdArgsList = []
- args: Optional[CmdArgsList] = None
-
- @model_validator(mode='after')
- def validate_model(self) -> Self:
- if self.args is None:
- self.args = [*self.default_args, *self.extra_args]
- return self
-
-
- def get_output_path(self, snapshot) -> Path:
- return Path(self.__class__.__name__.lower())
-
- def should_extract(self, uri: str, config: dict | None=None) -> bool:
- try:
- assert self.detect_installed_binary().version
- except Exception:
- raise
- # could not load binary
- return False
-
- # output_dir = self.get_output_path(snapshot)
- # if output_dir.glob('*.*'):
- # return False
- return True
-
- @abx.hookimpl
- def extract(self, snapshot_id: str) -> Dict[str, Any]:
- from core.models import Snapshot
- from archivebox import CONSTANTS
-
- snapshot = Snapshot.objects.get(id=snapshot_id)
-
- if not self.should_extract(snapshot):
- return {}
-
- status = 'failed'
- start_ts = timezone.now()
- uplink = self.detect_network_interface()
- installed_binary = self.detect_installed_binary()
- machine = installed_binary.machine
- assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
-
- output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
- output_dir.mkdir(parents=True, exist_ok=True)
-
- # execute the extractor binary with the given args
- args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
- cmd = [str(installed_binary.abspath), *args]
- proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
-
- # collect the output
- end_ts = timezone.now()
- output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
- stdout = proc.stdout.strip()
- stderr = proc.stderr.strip()
- output_json = None
- output_text = stdout
- try:
- output_json = json.loads(stdout.strip())
- output_text = None
- except json.JSONDecodeError:
- pass
-
- errors = []
- if proc.returncode == 0:
- status = 'success'
- else:
- errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
-
- # increment health stats counters
- if status == 'success':
- machine.record_health_success()
- uplink.record_health_success()
- installed_binary.record_health_success()
- else:
- machine.record_health_failure()
- uplink.record_health_failure()
- installed_binary.record_health_failure()
-
- return {
- 'extractor': self.name,
-
- 'snapshot': {
- 'id': snapshot.id,
- 'abid': snapshot.abid,
- 'url': snapshot.url,
- 'created_by_id': snapshot.created_by_id,
- },
-
- 'machine': {
- 'id': machine.id,
- 'abid': machine.abid,
- 'guid': machine.guid,
- 'hostname': machine.hostname,
- 'hw_in_docker': machine.hw_in_docker,
- 'hw_in_vm': machine.hw_in_vm,
- 'hw_manufacturer': machine.hw_manufacturer,
- 'hw_product': machine.hw_product,
- 'hw_uuid': machine.hw_uuid,
- 'os_arch': machine.os_arch,
- 'os_family': machine.os_family,
- 'os_platform': machine.os_platform,
- 'os_release': machine.os_release,
- 'os_kernel': machine.os_kernel,
- },
-
- 'uplink': {
- 'id': uplink.id,
- 'abid': uplink.abid,
- 'mac_address': uplink.mac_address,
- 'ip_public': uplink.ip_public,
- 'ip_local': uplink.ip_local,
- 'dns_server': uplink.dns_server,
- 'hostname': uplink.hostname,
- 'iface': uplink.iface,
- 'isp': uplink.isp,
- 'city': uplink.city,
- 'region': uplink.region,
- 'country': uplink.country,
- },
-
- 'binary': {
- 'id': installed_binary.id,
- 'abid': installed_binary.abid,
- 'name': installed_binary.name,
- 'binprovider': installed_binary.binprovider,
- 'abspath': installed_binary.abspath,
- 'version': installed_binary.version,
- 'sha256': installed_binary.sha256,
- },
-
- 'cmd': cmd,
- 'stdout': stdout,
- 'stderr': stderr,
- 'returncode': proc.returncode,
- 'start_ts': start_ts,
- 'end_ts': end_ts,
-
- 'status': status,
- 'errors': errors,
- 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
- 'output_files': output_files,
- 'output_json': output_json or {},
- 'output_text': output_text or '',
- }
-
- # TODO: move this to a hookimpl
- def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
- cwd = cwd or Path(os.getcwd())
- binary = self.load_binary(installed_binary=installed_binary)
-
- return binary.exec(cmd=args, cwd=cwd)
-
- @cached_property
- def BINARY(self) -> BaseBinary:
- import abx.archivebox.reads
- for binary in abx.archivebox.reads.get_BINARIES().values():
- if binary.name == self.binary:
- return binary
- raise ValueError(f'Binary {self.binary} not found')
-
- def detect_installed_binary(self):
- from machine.models import InstalledBinary
- # hydrates binary from DB/cache if record of installed version is recent enough
- # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
- return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
-
- def load_binary(self, installed_binary=None) -> BaseBinary:
- installed_binary = installed_binary or self.detect_installed_binary()
- return installed_binary.load_from_db()
-
- def detect_network_interface(self):
- from machine.models import NetworkInterface
- return NetworkInterface.objects.current()
-
- @abx.hookimpl
- def get_EXTRACTORS(self):
- return [self]
diff --git a/archivebox/abx/archivebox/base_replayer.py b/archivebox/abx/archivebox/base_replayer.py
deleted file mode 100644
index 097a9e94..00000000
--- a/archivebox/abx/archivebox/base_replayer.py
+++ /dev/null
@@ -1,25 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import abx
-
-
-class BaseReplayer:
- """Describes how to render an ArchiveResult in several contexts"""
-
- url_pattern: str = '*'
-
- row_template: str = 'plugins/generic_replayer/templates/row.html'
- embed_template: str = 'plugins/generic_replayer/templates/embed.html'
- fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
-
- # row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
- # embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
- # fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
- # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
- # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
-
- @abx.hookimpl
- def get_REPLAYERS(self):
- return [self]
-
- # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...
diff --git a/archivebox/abx/archivebox/base_searchbackend.py b/archivebox/abx/archivebox/base_searchbackend.py
deleted file mode 100644
index 72713ab8..00000000
--- a/archivebox/abx/archivebox/base_searchbackend.py
+++ /dev/null
@@ -1,25 +0,0 @@
-__package__ = 'abx.archivebox'
-
-from typing import Iterable, List
-import abc
-
-
-
-class BaseSearchBackend(abc.ABC):
- name: str
-
- @staticmethod
- @abc.abstractmethod
- def index(snapshot_id: str, texts: List[str]):
- return
-
- @staticmethod
- @abc.abstractmethod
- def flush(snapshot_ids: Iterable[str]):
- return
-
- @staticmethod
- @abc.abstractmethod
- def search(text: str) -> List[str]:
- raise NotImplementedError("search method must be implemented by subclass")
-
diff --git a/archivebox/abx/archivebox/hookspec.py b/archivebox/abx/archivebox/hookspec.py
deleted file mode 100644
index bfcb93b8..00000000
--- a/archivebox/abx/archivebox/hookspec.py
+++ /dev/null
@@ -1,52 +0,0 @@
-__package__ = 'abx.archivebox'
-
-from typing import Dict, Any
-
-from .. import hookspec
-
-from .base_binary import BaseBinary, BaseBinProvider
-from .base_configset import BaseConfigSet
-from .base_extractor import BaseExtractor
-from .base_searchbackend import BaseSearchBackend
-
-
-@hookspec
-def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
- return {}
-
-@hookspec
-def get_CONFIG() -> Dict[str, BaseConfigSet]:
- return {}
-
-
-
-@hookspec
-def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
- return {}
-
-@hookspec
-def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
- return {}
-
-# @hookspec
-# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
-# return {}
-
-# @hookspec
-# def get_ADMINDATAVIEWS():
-# return {}
-
-# @hookspec
-# def get_QUEUES():
-# return {}
-
-
-##############################################################
-# provided by abx.pydantic_pkgr.hookspec:
-# @hookspec
-# def get_BINARIES() -> Dict[str, BaseBinary]:
-# return {}
-
-# @hookspec
-# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
-# return {}
diff --git a/archivebox/abx/archivebox/reads.py b/archivebox/abx/archivebox/reads.py
deleted file mode 100644
index d2877ac5..00000000
--- a/archivebox/abx/archivebox/reads.py
+++ /dev/null
@@ -1,160 +0,0 @@
-__package__ = 'abx.archivebox'
-
-import importlib
-from typing import Dict, Set, Any, TYPE_CHECKING
-
-from benedict import benedict
-
-import abx
-from .. import pm
-
-if TYPE_CHECKING:
- from .base_configset import BaseConfigSet
- from .base_binary import BaseBinary, BaseBinProvider
- from .base_extractor import BaseExtractor
- from .base_searchbackend import BaseSearchBackend
- # from .base_replayer import BaseReplayer
- # from .base_queue import BaseQueue
- # from .base_admindataview import BaseAdminDataView
-
-# API exposed to ArchiveBox code
-
-def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
- return benedict({
- plugin_id: plugin
- for plugin_dict in pm.hook.get_PLUGIN()
- for plugin_id, plugin in plugin_dict.items()
- })
-
-def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
- plugin_info = get_PLUGINS().get(plugin_id, {})
- package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
- if not package:
- return {'id': plugin_id, 'hooks': {}}
- module = importlib.import_module(package)
- hooks = abx.get_plugin_hooks(module.__package__)
- assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
-
- return benedict({
- 'id': plugin_id,
- 'label': getattr(module, '__label__', plugin_id),
- 'module': module,
- 'package': module.__package__,
- 'hooks': hooks,
- 'version': getattr(module, '__version__', '999.999.999'),
- 'author': getattr(module, '__author__', 'Unknown'),
- 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
- 'dependencies': getattr(module, '__dependencies__', []),
- 'source_code': module.__file__,
- **plugin_info,
- })
-
-
-def get_HOOKS() -> Set[str]:
- return {
- hook_name
- for plugin_id in get_PLUGINS().keys()
- for hook_name in get_PLUGIN(plugin_id).hooks
- }
-
-def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
- return benedict({
- config_id: configset
- for plugin_configs in pm.hook.get_CONFIG()
- for config_id, configset in plugin_configs.items()
- })
-
-
-def get_FLAT_CONFIG() -> Dict[str, Any]:
- return benedict({
- key: value
- for configset in get_CONFIGS().values()
- for key, value in configset.model_dump().items()
- })
-
-def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
- # TODO: move these to plugins
- from abx.archivebox.base_binary import apt, brew, env
- builtin_binproviders = {
- 'env': env,
- 'apt': apt,
- 'brew': brew,
- }
-
- return benedict({
- binprovider_id: binprovider
- for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
- for binprovider_id, binprovider in plugin_binproviders.items()
- })
-
-def get_BINARIES() -> Dict[str, 'BaseBinary']:
- return benedict({
- binary_id: binary
- for plugin_binaries in pm.hook.get_BINARIES()
- for binary_id, binary in plugin_binaries.items()
- })
-
-def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
- return benedict({
- extractor_id: extractor
- for plugin_extractors in pm.hook.get_EXTRACTORS()
- for extractor_id, extractor in plugin_extractors.items()
- })
-
-# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
-# return benedict({
-# replayer.id: replayer
-# for plugin_replayers in pm.hook.get_REPLAYERS()
-# for replayer in plugin_replayers
-# })
-
-# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
-# return benedict({
-# admin_dataview.id: admin_dataview
-# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
-# for admin_dataview in plugin_admin_dataviews
-# })
-
-# def get_QUEUES() -> Dict[str, 'BaseQueue']:
-# return benedict({
-# queue.id: queue
-# for plugin_queues in pm.hook.get_QUEUES()
-# for queue in plugin_queues
-# })
-
-def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
- return benedict({
- searchbackend_id: searchbackend
- for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
- for searchbackend_id,searchbackend in plugin_searchbackends.items()
- })
-
-
-
-def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
- """Get all the relevant config for the given scope, in correct precedence order"""
-
- from django.conf import settings
- default_config: benedict = defaults or settings.CONFIG
-
- snapshot = snapshot or (archiveresult and archiveresult.snapshot)
- crawl = crawl or (snapshot and snapshot.crawl)
- seed = seed or (crawl and crawl.seed)
- persona = persona or (crawl and crawl.persona)
-
- persona_config = persona.config if persona else {}
- seed_config = seed.config if seed else {}
- crawl_config = crawl.config if crawl else {}
- snapshot_config = snapshot.config if snapshot else {}
- archiveresult_config = archiveresult.config if archiveresult else {}
- extra_config = extra_config or {}
-
- return {
- **default_config, # defaults / config file / environment variables
- **persona_config, # lowest precedence
- **seed_config,
- **crawl_config,
- **snapshot_config,
- **archiveresult_config,
- **extra_config, # highest precedence
- }
diff --git a/archivebox/abx/django/__init__.py b/archivebox/abx/django/__init__.py
deleted file mode 100644
index 56fe8ddd..00000000
--- a/archivebox/abx/django/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__package__ = 'abx.django'
diff --git a/archivebox/abx/django/apps.py b/archivebox/abx/django/apps.py
deleted file mode 100644
index 085647c1..00000000
--- a/archivebox/abx/django/apps.py
+++ /dev/null
@@ -1,13 +0,0 @@
-__package__ = 'abx.django'
-
-from django.apps import AppConfig
-
-
-class ABXConfig(AppConfig):
- name = 'abx'
-
- def ready(self):
- import abx
- from django.conf import settings
-
- abx.pm.hook.ready(settings=settings)
diff --git a/archivebox/abx/django/hookspec.py b/archivebox/abx/django/hookspec.py
deleted file mode 100644
index 87f8e520..00000000
--- a/archivebox/abx/django/hookspec.py
+++ /dev/null
@@ -1,125 +0,0 @@
-__package__ = 'abx.django'
-
-from ..hookspec import hookspec
-
-
-###########################################################################################
-
-@hookspec
-def get_INSTALLED_APPS():
- """Return a list of apps to add to INSTALLED_APPS"""
- # e.g. ['your_plugin_type.plugin_name']
- return []
-
-# @hookspec
-# def register_INSTALLED_APPS(INSTALLED_APPS):
-# """Mutate INSTALLED_APPS in place to add your app in a specific position"""
-# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth')
-# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name')
-# pass
-
-
-@hookspec
-def get_TEMPLATE_DIRS():
- return [] # e.g. ['your_plugin_type/plugin_name/templates']
-
-# @hookspec
-# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
-# """Install django settings"""
-# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates')
-# pass
-
-
-@hookspec
-def get_STATICFILES_DIRS():
- return [] # e.g. ['your_plugin_type/plugin_name/static']
-
-# @hookspec
-# def register_STATICFILES_DIRS(STATICFILES_DIRS):
-# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
-# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
-# pass
-
-
-@hookspec
-def get_MIDDLEWARE():
- return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
-
-# @hookspec
-# def register_MIDDLEWARE(MIDDLEWARE):
-# """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
-# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
-# pass
-
-
-@hookspec
-def get_AUTHENTICATION_BACKENDS():
- return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
-
-# @hookspec
-# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
-# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
-# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
-# pass
-
-@hookspec
-def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME):
- return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}]
-
-# @hookspec
-# def register_DJANGO_HUEY(DJANGO_HUEY):
-# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
-# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
-# pass
-
-
-@hookspec
-def get_ADMIN_DATA_VIEWS_URLS():
- return []
-
-# @hookspec
-# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
-# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
-# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
-# pass
-
-
-# @hookspec
-# def register_settings(settings):
-# """Mutate settings in place to add your settings / modify existing settings"""
-# # settings.SOME_KEY = 'some_value'
-# pass
-
-
-###########################################################################################
-
-@hookspec
-def get_urlpatterns():
- return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
-
-# @hookspec
-# def register_urlpatterns(urlpatterns):
-# """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
-# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
-# pass
-
-###########################################################################################
-
-@hookspec
-def register_checks():
- """Register django checks with django system checks system"""
- pass
-
-@hookspec
-def register_admin(admin_site):
- """Register django admin views/models with the main django admin site instance"""
- pass
-
-
-###########################################################################################
-
-
-@hookspec
-def ready():
- """Called when Django apps app.ready() are triggered"""
- pass
diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py
deleted file mode 100644
index a52ada3b..00000000
--- a/archivebox/abx/django/use.py
+++ /dev/null
@@ -1,101 +0,0 @@
-__package__ = 'abx.django'
-
-import itertools
-# from benedict import benedict
-
-from .. import pm
-
-
-def get_INSTALLED_APPS():
- return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
-
-# def register_INSTALLLED_APPS(INSTALLED_APPS):
-# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
-
-
-def get_MIDDLEWARES():
- return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
-
-# def register_MIDDLEWARES(MIDDLEWARE):
-# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
-
-
-def get_AUTHENTICATION_BACKENDS():
- return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
-
-# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
-# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
-
-
-def get_STATICFILES_DIRS():
- return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
-
-# def register_STATICFILES_DIRS(STATICFILES_DIRS):
-# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
-
-
-def get_TEMPLATE_DIRS():
- return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
-
-# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
-# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
-
-def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
- HUEY_QUEUES = {}
- for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
- HUEY_QUEUES.update(plugin_result)
- return HUEY_QUEUES
-
-# def register_DJANGO_HUEY(DJANGO_HUEY):
-# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
-
-def get_ADMIN_DATA_VIEWS_URLS():
- return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
-
-# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
-# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
-
-
-# def register_settings(settings):
-# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
-# settings_as_obj = benedict(settings, keypath_separator=None)
-
-# # set default values for settings that are used by plugins
-# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
-# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
-# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
-# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
-# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
-# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
-# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
-
-# # # call all the hook functions to mutate the settings values in-place
-# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
-# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
-# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
-# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
-# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
-# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
-# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
-
-# # calls Plugin.settings(settings) on each registered plugin
-# pm.hook.register_settings(settings=settings_as_obj)
-
-# # then finally update the settings globals() object will all the new settings
-# # settings.update(settings_as_obj)
-
-
-def get_urlpatterns():
- return list(itertools.chain(*pm.hook.urlpatterns()))
-
-def register_urlpatterns(urlpatterns):
- pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
-
-
-def register_checks():
- """register any django system checks"""
- pm.hook.register_checks()
-
-def register_admin(admin_site):
- """register any django admin models/views with the main django admin site instance"""
- pm.hook.register_admin(admin_site=admin_site)
diff --git a/archivebox/abx/hookspec.py b/archivebox/abx/hookspec.py
deleted file mode 100644
index a25f7673..00000000
--- a/archivebox/abx/hookspec.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from pathlib import Path
-
-from pluggy import HookimplMarker
-from pluggy import HookspecMarker
-
-spec = hookspec = HookspecMarker("abx")
-impl = hookimpl = HookimplMarker("abx")
-
-
-@hookspec
-@hookimpl
-def get_system_user() -> str:
- # Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases
- # - sudo (EUD != UID != SUID)
- # - running with an autodetected UID based on data dir ownership
- # but mapping of UID:username is broken because it was created
- # by a different host system, e.g. 911's $HOME outside of docker
- # might be /usr/lib/lxd instead of /home/archivebox
- # - running as a user that doens't have a home directory
- # - home directory is set to a path that doesn't exist, or is inside a dir we cant read
- return Path('~').expanduser().name
-
diff --git a/archivebox/abx/manager.py b/archivebox/abx/manager.py
deleted file mode 100644
index 8d44a087..00000000
--- a/archivebox/abx/manager.py
+++ /dev/null
@@ -1,30 +0,0 @@
-import inspect
-
-import pluggy
-
-
-class PluginManager(pluggy.PluginManager):
- """
- Patch to fix pluggy's PluginManager to work with pydantic models.
- See: https://github.com/pytest-dev/pluggy/pull/536
- """
- def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
- # IMPORTANT: @property methods can have side effects, and are never hookimpl
- # if attr is a property, skip it in advance
- plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
- if isinstance(getattr(plugin_class, name, None), property):
- return None
-
- # pydantic model fields are like attrs and also can never be hookimpls
- plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
- if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
- # pydantic models mess with the class and attr __signature__
- # so inspect.isroutine(...) throws exceptions and cant be used
- return None
-
- try:
- return super().parse_hookimpl_opts(plugin, name)
- except AttributeError:
- return super().parse_hookimpl_opts(type(plugin), name)
-
-pm = PluginManager("abx")
diff --git a/archivebox/abx/pydantic_pkgr/__init__.py b/archivebox/abx/pydantic_pkgr/__init__.py
deleted file mode 100644
index 28cd0f81..00000000
--- a/archivebox/abx/pydantic_pkgr/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-__package__ = 'abx.pydantic_pkgr'
diff --git a/archivebox/abx/pydantic_pkgr/hookspec.py b/archivebox/abx/pydantic_pkgr/hookspec.py
deleted file mode 100644
index 6b293abb..00000000
--- a/archivebox/abx/pydantic_pkgr/hookspec.py
+++ /dev/null
@@ -1,13 +0,0 @@
-
-from ..hookspec import hookspec
-
-###########################################################################################
-
-@hookspec
-def get_BINPROVIDERS():
- return {}
-
-@hookspec
-def get_BINARIES():
- return {}
-
diff --git a/archivebox/plugins_auth/__init__.py b/archivebox/actors/__init__.py
similarity index 100%
rename from archivebox/plugins_auth/__init__.py
rename to archivebox/actors/__init__.py
diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py
new file mode 100644
index 00000000..62369793
--- /dev/null
+++ b/archivebox/actors/actor.py
@@ -0,0 +1,313 @@
+__package__ = 'archivebox.actors'
+
+import os
+import time
+from abc import ABC, abstractmethod
+from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type
+from django.utils.functional import classproperty
+
+from rich import print
+import psutil
+
+from django import db
+from django.db import models
+from django.db.models import QuerySet
+from multiprocessing import Process, cpu_count
+from threading import Thread, get_native_id
+
+# from archivebox.logging_util import TimedProgress
+
+LaunchKwargs = dict[str, Any]
+
+ModelType = TypeVar('ModelType', bound=models.Model)
+
+class ActorType(ABC, Generic[ModelType]):
+ """
+ Base class for all actors. Usage:
+ class FaviconActor(ActorType[ArchiveResult]):
+ QUERYSET: ClassVar[QuerySet] = ArchiveResult.objects.filter(status='queued', extractor='favicon')
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
+ ATOMIC: ClassVar[bool] = True
+
+ def claim_sql_set(self, obj: ArchiveResult) -> str:
+ # SQL fields to update atomically while claiming an object from the queue
+ retry_at = datetime.now() + timedelta(seconds=self.MAX_TICK_TIME)
+ return f"status = 'started', locked_by = {self.pid}, retry_at = {retry_at}"
+
+ def tick(self, obj: ArchiveResult) -> None:
+ run_favicon_extractor(obj)
+ ArchiveResult.objects.filter(pk=obj.pk, status='started').update(status='success')
+ """
+ pid: int
+ idle_count: int = 0
+ launch_kwargs: LaunchKwargs = {}
+ mode: Literal['thread', 'process'] = 'process'
+
+ MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores
+ MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object
+
+ QUERYSET: ClassVar[QuerySet] # the QuerySet to claim objects from
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue
+ CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
+ CLAIM_FROM_TOP: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 # the number of objects to consider when atomically getting the next object from the queue
+ ATOMIC: ClassVar[bool] = True # whether to atomically fetch+claim the nextobject in one step, or fetch and lock it in two steps
+
+ # model_type: Type[ModelType]
+
+ _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class
+
+ def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs):
+ self.mode = mode or self.mode
+ self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs)
+
+ @classproperty
+ def name(cls) -> str:
+ return cls.__name__ # type: ignore
+
+ def __str__(self) -> str:
+ return self.__repr__()
+
+ def __repr__(self) -> str:
+ """FaviconActor[pid=1234]"""
+ label = 'pid' if self.mode == 'process' else 'tid'
+ return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]'
+
+ ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned
+
+ @classmethod
+ def get_running_actors(cls) -> list[int]:
+ """returns a list of pids of all running actors of this type"""
+ # WARNING: only works for process actors, not thread actors
+ if cls.mode == 'thread':
+ raise NotImplementedError('get_running_actors() is not implemented for thread actors')
+ return [
+ proc.pid for proc in cls._SPAWNED_ACTOR_PIDS
+ if proc.is_running() and proc.status() != 'zombie'
+ ]
+
+ @classmethod
+ def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]:
+ """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors"""
+ queue_length = queue.count()
+ if not queue_length: # queue is empty, spawn 0 actors
+ return []
+
+ actors_to_spawn: list[LaunchKwargs] = []
+ max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors)
+
+ # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them,
+ # let the next orchestrator tick handle starting another 2 on the next tick()
+ # if queue_length > 10: # queue is long, spawn as many as possible
+ # actors_to_spawn += max_spawnable * [{}]
+
+ if queue_length > 4: # queue is medium, spawn 1 or 2 actors
+ actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}]
+ else: # queue is short, spawn 1 actor
+ actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}]
+ return actors_to_spawn
+
+ @classmethod
+ def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int:
+ if mode == 'thread':
+ return cls.fork_actor_as_thread(**launch_kwargs)
+ elif mode == 'process':
+ return cls.fork_actor_as_process(**launch_kwargs)
+ raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"')
+
+ @classmethod
+ def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int:
+ """Spawn a new background thread running the actor's runloop"""
+ actor = cls(mode='thread', **launch_kwargs)
+ bg_actor_thread = Thread(target=actor.runloop)
+ bg_actor_thread.start()
+ assert bg_actor_thread.native_id is not None
+ return bg_actor_thread.native_id
+
+ @classmethod
+ def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int:
+ """Spawn a new background process running the actor's runloop"""
+ actor = cls(mode='process', **launch_kwargs)
+ bg_actor_process = Process(target=actor.runloop)
+ bg_actor_process.start()
+ assert bg_actor_process.pid is not None
+ cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid))
+ return bg_actor_process.pid
+
+ @classmethod
+ def get_model(cls) -> Type[ModelType]:
+ # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty
+ return cls.QUERYSET.model
+
+ @classmethod
+ def get_queue(cls) -> QuerySet:
+ """override this to provide your queryset as the queue"""
+ # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot'))
+ return cls.QUERYSET
+
+ ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process)
+
+ def runloop(self):
+ """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty"""
+ self.on_startup()
+ try:
+ while True:
+ obj_to_process: ModelType | None = None
+ try:
+ obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic))
+ except Exception:
+ pass
+
+ if obj_to_process:
+ self.idle_count = 0 # reset idle count if we got an object
+ else:
+ if self.idle_count >= 30:
+ break # stop looping and exit if queue is empty and we have idled for 30sec
+ else:
+ # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...')
+ self.idle_count += 1
+ time.sleep(1)
+ continue
+
+ self.on_tick_start(obj_to_process)
+
+ # Process the object
+ try:
+ self.tick(obj_to_process)
+ except Exception as err:
+ print(f'[red]🏃♂️ ERROR: {self}.tick()[/red]', err)
+ db.connections.close_all() # always reset the db connection after an exception to clear any pending transactions
+ self.on_tick_exception(obj_to_process, err)
+ finally:
+ self.on_tick_end(obj_to_process)
+
+ self.on_shutdown(err=None)
+ except BaseException as err:
+ if isinstance(err, KeyboardInterrupt):
+ print()
+ else:
+ print(f'\n[red]🏃♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err)
+ self.on_shutdown(err=err)
+
+ def get_next(self, atomic: bool | None=None) -> ModelType | None:
+ """get the next object from the queue, atomically locking it if self.atomic=True"""
+ if atomic is None:
+ atomic = self.ATOMIC
+
+ if atomic:
+ # fetch and claim the next object from in the queue in one go atomically
+ obj = self.get_next_atomic()
+ else:
+ # two-step claim: fetch the next object and lock it in a separate query
+ obj = self.get_queue().last()
+ assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE'
+ return obj
+
+ def lock_next(self, obj: ModelType) -> bool:
+ """override this to implement a custom two-step (non-atomic)lock mechanism"""
+ # For example:
+ # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid)
+ # Not needed if using get_next_and_lock() to claim the object atomically
+ # print(f'[blue]🏃♂️ {self}.lock()[/blue]', obj.abid or obj.id)
+ return True
+
+ def claim_sql_where(self) -> str:
+ """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
+ return self.CLAIM_WHERE
+
+ def claim_sql_set(self) -> str:
+ """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
+ return self.CLAIM_SET
+
+ def claim_sql_order(self) -> str:
+ """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
+ return self.CLAIM_ORDER
+
+ def claim_from_top(self) -> int:
+ """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
+ return self.CLAIM_FROM_TOP
+
+ def get_next_atomic(self, shallow: bool=True) -> ModelType | None:
+ """
+ claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object)
+ optimized for minimizing contention on the queue with other actors selecting from the same list
+ slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count
+ """
+ Model = self.get_model() # e.g. ArchiveResult
+ table = f'{Model._meta.app_label}_{Model._meta.model_name}' # e.g. core_archiveresult
+
+ where_sql = self.claim_sql_where()
+ set_sql = self.claim_sql_set()
+ order_by_sql = self.claim_sql_order()
+ choose_from_top = self.claim_from_top()
+
+ with db.connection.cursor() as cursor:
+ # subquery gets the pool of the top 50 candidates sorted by sort and order
+ # main query selects a random one from that pool
+ cursor.execute(f"""
+ UPDATE {table}
+ SET {set_sql}
+ WHERE {where_sql} and id = (
+ SELECT id FROM (
+ SELECT id FROM {table}
+ WHERE {where_sql}
+ ORDER BY {order_by_sql}
+ LIMIT {choose_from_top}
+ ) candidates
+ ORDER BY RANDOM()
+ LIMIT 1
+ )
+ RETURNING id;
+ """)
+ result = cursor.fetchone()
+
+ if result is None:
+ return None # If no rows were claimed, return None
+
+ if shallow:
+ # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields:
+ columns = [col[0] for col in cursor.description or ['id']]
+ return Model(**dict(zip(columns, result)))
+
+ # if not shallow do one extra query to get a more complete object instance (load it fully from scratch)
+ return Model.objects.get(id=result[0])
+
+ @abstractmethod
+ def tick(self, obj: ModelType) -> None:
+ """override this to process the object"""
+ print(f'[blue]🏃♂️ {self}.tick()[/blue]', obj.abid or obj.id)
+ # For example:
+ # do_some_task(obj)
+ # do_something_else(obj)
+ # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
+ raise NotImplementedError('tick() must be implemented by the Actor subclass')
+
+ def on_startup(self) -> None:
+ if self.mode == 'thread':
+ self.pid = get_native_id() # thread id
+ print(f'[green]🏃♂️ {self}.on_startup() STARTUP (THREAD)[/green]')
+ else:
+ self.pid = os.getpid() # process id
+ print(f'[green]🏃♂️ {self}.on_startup() STARTUP (PROCESS)[/green]')
+ # abx.pm.hook.on_actor_startup(self)
+
+ def on_shutdown(self, err: BaseException | None=None) -> None:
+ print(f'[grey53]🏃♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
+ # abx.pm.hook.on_actor_shutdown(self)
+
+ def on_tick_start(self, obj: ModelType) -> None:
+ # print(f'🏃♂️ {self}.on_tick_start()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
+ # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
+ pass
+
+ def on_tick_end(self, obj: ModelType) -> None:
+ # print(f'🏃♂️ {self}.on_tick_end()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
+ # self.timer.end()
+ pass
+
+ def on_tick_exception(self, obj: ModelType, err: BaseException) -> None:
+ print(f'[red]🏃♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
+ # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
diff --git a/archivebox/actors/admin.py b/archivebox/actors/admin.py
new file mode 100644
index 00000000..8c38f3f3
--- /dev/null
+++ b/archivebox/actors/admin.py
@@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
diff --git a/archivebox/actors/apps.py b/archivebox/actors/apps.py
new file mode 100644
index 00000000..2347ac3f
--- /dev/null
+++ b/archivebox/actors/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class ActorsConfig(AppConfig):
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "actors"
diff --git a/archivebox/plugins_extractor/__init__.py b/archivebox/actors/migrations/__init__.py
similarity index 100%
rename from archivebox/plugins_extractor/__init__.py
rename to archivebox/actors/migrations/__init__.py
diff --git a/archivebox/actors/models.py b/archivebox/actors/models.py
new file mode 100644
index 00000000..71a83623
--- /dev/null
+++ b/archivebox/actors/models.py
@@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.
diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py
new file mode 100644
index 00000000..df4c860b
--- /dev/null
+++ b/archivebox/actors/orchestrator.py
@@ -0,0 +1,244 @@
+__package__ = 'archivebox.actors'
+
+import os
+import time
+import itertools
+from typing import Dict, Type, Literal, ClassVar
+from django.utils.functional import classproperty
+
+from multiprocessing import Process, cpu_count
+from threading import Thread, get_native_id
+
+
+from rich import print
+
+from django.db.models import QuerySet
+
+from django.apps import apps
+from .actor import ActorType
+
+class Orchestrator:
+ pid: int
+ idle_count: int = 0
+ actor_types: Dict[str, Type[ActorType]]
+ mode: Literal['thread', 'process'] = 'process'
+
+ def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None, mode: Literal['thread', 'process'] | None=None):
+ self.actor_types = actor_types or self.actor_types or self.autodiscover_actor_types()
+ self.mode = mode or self.mode
+
+ def __repr__(self) -> str:
+ label = 'tid' if self.mode == 'thread' else 'pid'
+ return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]'
+
+ def __str__(self) -> str:
+ return self.__repr__()
+
+ @classproperty
+ def name(cls) -> str:
+ return cls.__name__ # type: ignore
+
+ def fork_as_thread(self):
+ self.thread = Thread(target=self.runloop)
+ self.thread.start()
+ assert self.thread.native_id is not None
+ return self.thread.native_id
+
+ def fork_as_process(self):
+ self.process = Process(target=self.runloop)
+ self.process.start()
+ assert self.process.pid is not None
+ return self.process.pid
+
+ def start(self) -> int:
+ if self.mode == 'thread':
+ return self.fork_as_thread()
+ elif self.mode == 'process':
+ return self.fork_as_process()
+ raise ValueError(f'Invalid orchestrator mode: {self.mode}')
+
+ @classmethod
+ def autodiscover_actor_types(cls) -> Dict[str, Type[ActorType]]:
+ # returns a Dict of all discovered {actor_type_id: ActorType} across the codebase
+ # override this method in a subclass to customize the actor types that are used
+ # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...}
+ return {
+ # look through all models and find all classes that inherit from ActorType
+ # actor_type.__name__: actor_type
+ # for actor_type in abx.pm.hook.get_all_ACTORS_TYPES().values()
+ }
+
+ @classmethod
+ def get_orphaned_objects(cls, all_queues) -> list:
+ # returns a list of objects that are in the queues of all actor types but not in the queues of any other actor types
+ all_queued_ids = itertools.chain(*[queue.values('id', flat=True) for queue in all_queues.values()])
+ orphaned_objects = []
+ for model in apps.get_models():
+ if hasattr(model, 'retry_at'):
+ orphaned_objects.extend(model.objects.filter(retry_at__lt=timezone.now()).exclude(id__in=all_queued_ids))
+ return orphaned_objects
+
+ def on_startup(self):
+ if self.mode == 'thread':
+ self.pid = get_native_id()
+ print(f'[green]👨✈️ {self}.on_startup() STARTUP (THREAD)[/green]')
+ elif self.mode == 'process':
+ self.pid = os.getpid()
+ print(f'[green]👨✈️ {self}.on_startup() STARTUP (PROCESS)[/green]')
+ # abx.pm.hook.on_orchestrator_startup(self)
+
+ def on_shutdown(self, err: BaseException | None = None):
+ print(f'[grey53]👨✈️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
+ # abx.pm.hook.on_orchestrator_shutdown(self)
+
+ def on_tick_started(self, all_queues):
+ # total_pending = sum(queue.count() for queue in all_queues.values())
+ # print(f'👨✈️ {self}.on_tick_started()', f'total_pending={total_pending}')
+ # abx.pm.hook.on_orchestrator_tick_started(self, actor_types, all_queues)
+ pass
+
+ def on_tick_finished(self, all_queues, all_existing_actors, all_spawned_actors):
+ if all_spawned_actors:
+ total_queue_length = sum(queue.count() for queue in all_queues.values())
+ print(f'[grey53]👨✈️ {self}.on_tick_finished() queue={total_queue_length} existing_actors={len(all_existing_actors)} spawned_actors={len(all_spawned_actors)}[/grey53]')
+ # abx.pm.hook.on_orchestrator_tick_finished(self, actor_types, all_queues)
+
+ def on_idle(self, all_queues):
+ # print(f'👨✈️ {self}.on_idle()')
+ # abx.pm.hook.on_orchestrator_idle(self)
+ # check for orphaned objects left behind
+ if self.idle_count == 60:
+ orphaned_objects = self.get_orphaned_objects(all_queues)
+ if orphaned_objects:
+ print('[red]👨✈️ WARNING: some objects may not be processed, no actor has claimed them after 60s:[/red]', orphaned_objects)
+
+ def runloop(self):
+ self.on_startup()
+ try:
+ while True:
+ all_queues = {
+ actor_type: actor_type.get_queue()
+ for actor_type in self.actor_types.values()
+ }
+ if not all_queues:
+ raise Exception('Failed to find any actor_types to process')
+
+ self.on_tick_started(all_queues)
+
+ all_existing_actors = []
+ all_spawned_actors = []
+
+ for actor_type, queue in all_queues.items():
+ try:
+ existing_actors = actor_type.get_running_actors()
+ all_existing_actors.extend(existing_actors)
+ actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors)
+ for launch_kwargs in actors_to_spawn:
+ new_actor_pid = actor_type.start(mode='process', **launch_kwargs)
+ all_spawned_actors.append(new_actor_pid)
+ except Exception as err:
+ print(f'🏃♂️ ERROR: {self} Failed to get {actor_type} queue & running actors', err)
+ except BaseException:
+ raise
+
+ if not any(queue.exists() for queue in all_queues.values()):
+ self.on_idle(all_queues)
+ self.idle_count += 1
+ time.sleep(1)
+ else:
+ self.idle_count = 0
+
+ self.on_tick_finished(all_queues, all_existing_actors, all_spawned_actors)
+ time.sleep(1)
+
+ except BaseException as err:
+ if isinstance(err, KeyboardInterrupt):
+ print()
+ else:
+ print(f'\n[red]🏃♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err)
+ self.on_shutdown(err=err)
+
+
+
+from archivebox.config.django import setup_django
+
+setup_django()
+
+from core.models import ArchiveResult, Snapshot
+
+from django.utils import timezone
+
+from django import db
+from django.db import connection
+
+
+from crawls.actors import CrawlActor
+from .actor_snapshot import SnapshotActor
+
+from abx_plugin_singlefile.actors import SinglefileActor
+
+
+class FaviconActor(ActorType[ArchiveResult]):
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
+ CLAIM_SET: ClassVar[str] = 'status = "started"'
+
+ @classproperty
+ def QUERYSET(cls) -> QuerySet:
+ return ArchiveResult.objects.filter(status='failed', extractor='favicon')
+
+ def tick(self, obj: ArchiveResult):
+ print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
+ updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
+ if not updated:
+ raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
+ obj.refresh_from_db()
+ obj.save()
+
+
+class ExtractorsOrchestrator(Orchestrator):
+ actor_types = {
+ 'CrawlActor': CrawlActor,
+ 'SnapshotActor': SnapshotActor,
+ 'FaviconActor': FaviconActor,
+ 'SinglefileActor': SinglefileActor,
+ }
+
+
+if __name__ == '__main__':
+ orchestrator = ExtractorsOrchestrator()
+ orchestrator.start()
+
+ snap = Snapshot.objects.last()
+ assert snap is not None
+ created = 0
+ while True:
+ time.sleep(0.05)
+ # try:
+ # ArchiveResult.objects.bulk_create([
+ # ArchiveResult(
+ # id=uuid.uuid4(),
+ # snapshot=snap,
+ # status='failed',
+ # extractor='favicon',
+ # cmd=['echo', '"hello"'],
+ # cmd_version='1.0',
+ # pwd='.',
+ # start_ts=timezone.now(),
+ # end_ts=timezone.now(),
+ # created_at=timezone.now(),
+ # modified_at=timezone.now(),
+ # created_by_id=1,
+ # )
+ # for _ in range(100)
+ # ])
+ # created += 100
+ # if created % 1000 == 0:
+ # print(f'[blue]Created {created} ArchiveResults...[/blue]')
+ # time.sleep(25)
+ # except Exception as err:
+ # print(err)
+ # db.connections.close_all()
+ # except BaseException as err:
+ # print(err)
+ # break
diff --git a/archivebox/actors/statemachine.py b/archivebox/actors/statemachine.py
new file mode 100644
index 00000000..53883120
--- /dev/null
+++ b/archivebox/actors/statemachine.py
@@ -0,0 +1,286 @@
+from statemachine import State, StateMachine
+from django.db import models
+from multiprocessing import Process
+import psutil
+import time
+
+# State Machine Definitions
+#################################################
+
+class SnapshotMachine(StateMachine):
+ """State machine for managing Snapshot lifecycle."""
+
+ # States
+ queued = State(initial=True)
+ started = State()
+ sealed = State(final=True)
+
+ # Transitions
+ start = queued.to(started, cond='can_start')
+ seal = started.to(sealed, cond='is_finished')
+
+ # Events
+ tick = (
+ queued.to.itself(unless='can_start') |
+ queued.to(started, cond='can_start') |
+ started.to.itself(unless='is_finished') |
+ started.to(sealed, cond='is_finished')
+ )
+
+ def __init__(self, snapshot):
+ self.snapshot = snapshot
+ super().__init__()
+
+ def can_start(self):
+ return True
+
+ def is_finished(self):
+ return not self.snapshot.has_pending_archiveresults()
+
+ def before_start(self):
+ """Pre-start validation and setup."""
+ self.snapshot.cleanup_dir()
+
+ def after_start(self):
+ """Post-start side effects."""
+ self.snapshot.create_pending_archiveresults()
+ self.snapshot.update_indices()
+ self.snapshot.bump_retry_at(seconds=10)
+
+ def before_seal(self):
+ """Pre-seal validation and cleanup."""
+ self.snapshot.cleanup_dir()
+
+ def after_seal(self):
+ """Post-seal actions."""
+ self.snapshot.update_indices()
+ self.snapshot.seal_dir()
+ self.snapshot.upload_dir()
+ self.snapshot.retry_at = None
+ self.snapshot.save()
+
+
+class ArchiveResultMachine(StateMachine):
+ """State machine for managing ArchiveResult lifecycle."""
+
+ # States
+ queued = State(initial=True)
+ started = State()
+ succeeded = State(final=True)
+ backoff = State()
+ failed = State(final=True)
+
+ # Transitions
+ start = queued.to(started, cond='can_start')
+ succeed = started.to(succeeded, cond='extractor_succeeded')
+ backoff = started.to(backoff, unless='extractor_succeeded')
+ retry = backoff.to(queued, cond='can_retry')
+ fail = backoff.to(failed, unless='can_retry')
+
+ # Events
+ tick = (
+ queued.to.itself(unless='can_start') |
+ queued.to(started, cond='can_start') |
+ started.to.itself(cond='extractor_still_running') |
+ started.to(succeeded, cond='extractor_succeeded') |
+ started.to(backoff, unless='extractor_succeeded') |
+ backoff.to.itself(cond='still_waiting_to_retry') |
+ backoff.to(queued, cond='can_retry') |
+ backoff.to(failed, unless='can_retry')
+ )
+
+ def __init__(self, archiveresult):
+ self.archiveresult = archiveresult
+ super().__init__()
+
+ def can_start(self):
+ return True
+
+ def extractor_still_running(self):
+ return self.archiveresult.start_ts > time.now() - timedelta(seconds=5)
+
+ def extractor_succeeded(self):
+ # return check_if_extractor_succeeded(self.archiveresult)
+ return self.archiveresult.start_ts < time.now() - timedelta(seconds=5)
+
+ def can_retry(self):
+ return self.archiveresult.retries < self.archiveresult.max_retries
+
+ def before_start(self):
+ """Pre-start initialization."""
+ self.archiveresult.retries += 1
+ self.archiveresult.start_ts = time.now()
+ self.archiveresult.output = None
+ self.archiveresult.error = None
+
+ def after_start(self):
+ """Post-start execution."""
+ self.archiveresult.bump_retry_at(seconds=self.archiveresult.timeout + 5)
+ execute_extractor(self.archiveresult)
+ self.archiveresult.snapshot.bump_retry_at(seconds=5)
+
+ def before_succeed(self):
+ """Pre-success validation."""
+ self.archiveresult.output = get_archiveresult_output(self.archiveresult)
+
+ def after_succeed(self):
+ """Post-success cleanup."""
+ self.archiveresult.end_ts = time.now()
+ self.archiveresult.retry_at = None
+ self.archiveresult.update_indices()
+
+ def before_backoff(self):
+ """Pre-backoff error capture."""
+ self.archiveresult.error = get_archiveresult_error(self.archiveresult)
+
+ def after_backoff(self):
+ """Post-backoff retry scheduling."""
+ self.archiveresult.end_ts = time.now()
+ self.archiveresult.bump_retry_at(
+ seconds=self.archiveresult.timeout * self.archiveresult.retries
+ )
+ self.archiveresult.update_indices()
+
+ def before_fail(self):
+ """Pre-failure finalization."""
+ self.archiveresult.retry_at = None
+
+ def after_fail(self):
+ """Post-failure cleanup."""
+ self.archiveresult.update_indices()
+
+# Models
+#################################################
+
+class Snapshot(models.Model):
+ status = models.CharField(max_length=32, default='queued')
+ retry_at = models.DateTimeField(null=True)
+
+ @property
+ def sm(self):
+ """Get the state machine for this snapshot."""
+ return SnapshotMachine(self)
+
+ def has_pending_archiveresults(self):
+ return self.archiveresult_set.exclude(
+ status__in=['succeeded', 'failed']
+ ).exists()
+
+ def bump_retry_at(self, seconds):
+ self.retry_at = time.now() + timedelta(seconds=seconds)
+ self.save()
+
+ def cleanup_dir(self):
+ cleanup_snapshot_dir(self)
+
+ def create_pending_archiveresults(self):
+ create_snapshot_pending_archiveresults(self)
+
+ def update_indices(self):
+ update_snapshot_index_json(self)
+ update_snapshot_index_html(self)
+
+ def seal_dir(self):
+ seal_snapshot_dir(self)
+
+ def upload_dir(self):
+ upload_snapshot_dir(self)
+
+
+class ArchiveResult(models.Model):
+ snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE)
+ status = models.CharField(max_length=32, default='queued')
+ retry_at = models.DateTimeField(null=True)
+ retries = models.IntegerField(default=0)
+ max_retries = models.IntegerField(default=3)
+ timeout = models.IntegerField(default=60)
+ start_ts = models.DateTimeField(null=True)
+ end_ts = models.DateTimeField(null=True)
+ output = models.TextField(null=True)
+ error = models.TextField(null=True)
+
+ def get_machine(self):
+ return ArchiveResultMachine(self)
+
+ def bump_retry_at(self, seconds):
+ self.retry_at = time.now() + timedelta(seconds=seconds)
+ self.save()
+
+ def update_indices(self):
+ update_archiveresult_index_json(self)
+ update_archiveresult_index_html(self)
+
+
+# Actor System
+#################################################
+
+class BaseActor:
+ MAX_TICK_TIME = 60
+
+ def tick(self, obj):
+ """Process a single object through its state machine."""
+ machine = obj.get_machine()
+
+ if machine.is_queued:
+ if machine.can_start():
+ machine.start()
+
+ elif machine.is_started:
+ if machine.can_seal():
+ machine.seal()
+
+ elif machine.is_backoff:
+ if machine.can_retry():
+ machine.retry()
+ else:
+ machine.fail()
+
+
+class Orchestrator:
+ """Main orchestrator that manages all actors."""
+
+ def __init__(self):
+ self.pid = None
+
+ @classmethod
+ def spawn(cls):
+ orchestrator = cls()
+ proc = Process(target=orchestrator.runloop)
+ proc.start()
+ return proc.pid
+
+ def runloop(self):
+ self.pid = os.getpid()
+ abx.pm.hook.on_orchestrator_startup(self)
+
+ try:
+ while True:
+ self.process_queue(Snapshot)
+ self.process_queue(ArchiveResult)
+ time.sleep(0.1)
+
+ except (KeyboardInterrupt, SystemExit):
+ abx.pm.hook.on_orchestrator_shutdown(self)
+
+ def process_queue(self, model):
+ retry_at_reached = Q(retry_at__isnull=True) | Q(retry_at__lte=time.now())
+ queue = model.objects.filter(retry_at_reached)
+
+ if queue.exists():
+ actor = BaseActor()
+ for obj in queue:
+ try:
+ with transaction.atomic():
+ actor.tick(obj)
+ except Exception as e:
+ abx.pm.hook.on_actor_tick_exception(actor, obj, e)
+
+
+# Periodic Tasks
+#################################################
+
+@djhuey.periodic_task(schedule=djhuey.crontab(minute='*'))
+def ensure_orchestrator_running():
+ """Ensure orchestrator is running, start if not."""
+ if not any(p.name().startswith('Orchestrator') for p in psutil.process_iter()):
+ Orchestrator.spawn()
diff --git a/archivebox/actors/tests.py b/archivebox/actors/tests.py
new file mode 100644
index 00000000..7ce503c2
--- /dev/null
+++ b/archivebox/actors/tests.py
@@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
diff --git a/archivebox/actors/views.py b/archivebox/actors/views.py
new file mode 100644
index 00000000..91ea44a2
--- /dev/null
+++ b/archivebox/actors/views.py
@@ -0,0 +1,3 @@
+from django.shortcuts import render
+
+# Create your views here.
diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py
index 1a3f8a7b..8513d682 100644
--- a/archivebox/config/__init__.py
+++ b/archivebox/config/__init__.py
@@ -1,4 +1,5 @@
-__package__ = 'archivebox.config'
+__package__ = 'config'
+__order__ = 200
from .paths import (
PACKAGE_DIR, # noqa
@@ -8,35 +9,28 @@ from .paths import (
from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
from .version import VERSION # noqa
-
-import abx
-
+# import abx
# @abx.hookimpl
-# def get_INSTALLED_APPS():
-# return ['config']
+# def get_CONFIG():
+# from .common import (
+# SHELL_CONFIG,
+# STORAGE_CONFIG,
+# GENERAL_CONFIG,
+# SERVER_CONFIG,
+# ARCHIVING_CONFIG,
+# SEARCH_BACKEND_CONFIG,
+# )
+# return {
+# 'SHELL_CONFIG': SHELL_CONFIG,
+# 'STORAGE_CONFIG': STORAGE_CONFIG,
+# 'GENERAL_CONFIG': GENERAL_CONFIG,
+# 'SERVER_CONFIG': SERVER_CONFIG,
+# 'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
+# 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+# }
-
-@abx.hookimpl
-def get_CONFIG():
- from .common import (
- SHELL_CONFIG,
- STORAGE_CONFIG,
- GENERAL_CONFIG,
- SERVER_CONFIG,
- ARCHIVING_CONFIG,
- SEARCH_BACKEND_CONFIG,
- )
- return {
- 'SHELL_CONFIG': SHELL_CONFIG,
- 'STORAGE_CONFIG': STORAGE_CONFIG,
- 'GENERAL_CONFIG': GENERAL_CONFIG,
- 'SERVER_CONFIG': SERVER_CONFIG,
- 'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
- 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
- }
-
-@abx.hookimpl
-def ready():
- for config in get_CONFIG().values():
- config.validate()
+# @abx.hookimpl
+# def ready():
+# for config in get_CONFIG().values():
+# config.validate()
diff --git a/archivebox/config/configfile.py b/archivebox/config/collection.py
similarity index 93%
rename from archivebox/config/configfile.py
rename to archivebox/config/collection.py
index c489e114..d0c5a273 100644
--- a/archivebox/config/configfile.py
+++ b/archivebox/config/collection.py
@@ -9,16 +9,18 @@ from configparser import ConfigParser
from benedict import benedict
+import archivebox
+
from archivebox.config.constants import CONSTANTS
from archivebox.misc.logging import stderr
def get_real_name(key: str) -> str:
- """get the current canonical name for a given deprecated config key"""
- from django.conf import settings
+ """get the up-to-date canonical name for a given old alias or current key"""
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
- for section in settings.CONFIGS.values():
+ for section in CONFIGS.values():
try:
return section.aliases[key]
except KeyError:
@@ -115,17 +117,15 @@ def load_config_file() -> Optional[benedict]:
def section_for_key(key: str) -> Any:
- from django.conf import settings
- for config_section in settings.CONFIGS.values():
+ for config_section in archivebox.pm.hook.get_CONFIGS().values():
if hasattr(config_section, key):
return config_section
- return None
+ raise ValueError(f'No config section found for key: {key}')
def write_config_file(config: Dict[str, str]) -> benedict:
"""load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
- import abx.archivebox.reads
from archivebox.misc.system import atomic_write
CONFIG_HEADER = (
@@ -175,7 +175,7 @@ def write_config_file(config: Dict[str, str]) -> benedict:
updated_config = {}
try:
# validate the updated_config by attempting to re-parse it
- updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
+ updated_config = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
except BaseException: # lgtm [py/catch-base-exception]
# something went horribly wrong, revert to the previous version
with open(f'{config_path}.bak', 'r', encoding='utf-8') as old:
@@ -233,11 +233,11 @@ def load_config(defaults: Dict[str, Any],
return benedict(extended_config)
def load_all_config():
- import abx.archivebox.reads
+ import abx
flat_config = benedict()
- for config_section in abx.archivebox.reads.get_CONFIGS().values():
+ for config_section in abx.pm.hook.get_CONFIGS().values():
config_section.__init__()
flat_config.update(config_section.model_dump())
diff --git a/archivebox/config/common.py b/archivebox/config/common.py
index 15f575f4..ee6c438b 100644
--- a/archivebox/config/common.py
+++ b/archivebox/config/common.py
@@ -10,7 +10,7 @@ from rich import print
from pydantic import Field, field_validator
from django.utils.crypto import get_random_string
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from .constants import CONSTANTS
from .version import get_COMMIT_HASH, get_BUILD_TIME, VERSION
@@ -45,8 +45,6 @@ class ShellConfig(BaseConfigSet):
def BUILD_TIME(self) -> str:
return get_BUILD_TIME()
- # def VERSIONS_AVAILABLE() -> bool # .check_for_update.get_versions_available_on_github(c)},
- # def CAN_UPGRADE() -> bool # .check_for_update.can_upgrade(c)},
SHELL_CONFIG = ShellConfig()
diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py
index b8019f99..5124384d 100644
--- a/archivebox/config/constants.py
+++ b/archivebox/config/constants.py
@@ -1,3 +1,15 @@
+"""
+Constants are for things that never change at runtime.
+(but they can change from run-to-run or machine-to-machine)
+
+DATA_DIR will never change at runtime, but you can run
+archivebox from inside a different DATA_DIR on the same machine.
+
+This is loaded very early in the archivebox startup flow, so nothing in this file
+or imported from this file should import anything from archivebox.config.common,
+django, other INSTALLED_APPS, or anything else that is not in a standard library.
+"""
+
__package__ = 'archivebox.config'
import re
@@ -197,10 +209,12 @@ class ConstantsDict(Mapping):
@classmethod
def __getitem__(cls, key: str):
+ # so it behaves like a dict[key] == dict.key or object attr
return getattr(cls, key)
@classmethod
def __benedict__(cls):
+ # when casting to benedict, only include uppercase keys that don't start with an underscore
return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')})
@classmethod
@@ -214,5 +228,6 @@ class ConstantsDict(Mapping):
CONSTANTS = ConstantsDict()
CONSTANTS_CONFIG = CONSTANTS.__benedict__()
-# add all key: values to globals() for easier importing
-globals().update(CONSTANTS)
+# add all key: values to globals() for easier importing, e.g.:
+# from archivebox.config.constants import IS_ROOT, PERSONAS_DIR, ...
+# globals().update(CONSTANTS)
diff --git a/archivebox/config/django.py b/archivebox/config/django.py
index eb79ab43..073cd2d4 100644
--- a/archivebox/config/django.py
+++ b/archivebox/config/django.py
@@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
return
with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS:
- INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25)
+ INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False)
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission
@@ -97,7 +97,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None:
except Exception as e:
bump_startup_progress_bar(advance=1000)
- is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init'))
+ is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version'))
if not is_using_meta_cmd:
# show error message to user only if they're not running a meta command / just trying to get help
STDERR.print()
diff --git a/archivebox/config/version.py b/archivebox/config/version.py
index 26df4592..026bfa64 100644
--- a/archivebox/config/version.py
+++ b/archivebox/config/version.py
@@ -45,7 +45,7 @@ def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR):
@cache
def get_COMMIT_HASH() -> Optional[str]:
try:
- git_dir = PACKAGE_DIR / '../.git'
+ git_dir = PACKAGE_DIR.parent / '.git'
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
commit_hash = git_dir.joinpath(ref).read_text().strip()
return commit_hash
@@ -53,7 +53,7 @@ def get_COMMIT_HASH() -> Optional[str]:
pass
try:
- return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip()
+ return list((PACKAGE_DIR.parent / '.git/refs/heads/').glob('*'))[0].read_text().strip()
except Exception:
pass
@@ -62,8 +62,12 @@ def get_COMMIT_HASH() -> Optional[str]:
@cache
def get_BUILD_TIME() -> str:
if IN_DOCKER:
- docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
- return docker_build_end_time
+ try:
+ # if we're in the archivebox official docker image, /VERSION.txt will contain the build time
+ docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
+ return docker_build_end_time
+ except Exception:
+ pass
src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
diff --git a/archivebox/config/views.py b/archivebox/config/views.py
index db2c7eaa..975ef7ff 100644
--- a/archivebox/config/views.py
+++ b/archivebox/config/views.py
@@ -14,8 +14,8 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
-import abx.archivebox.reads
-
+import abx
+import archivebox
from archivebox.config import CONSTANTS
from archivebox.misc.util import parse_date
@@ -65,7 +65,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str:
@render_with_table_view
def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
-
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {
@@ -81,12 +81,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
relevant_configs = {
key: val
- for key, val in settings.FLAT_CONFIG.items()
+ for key, val in FLAT_CONFIG.items()
if '_BINARY' in key or '_VERSION' in key
}
- for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
if not plugin.hooks.get('get_BINARIES'):
continue
@@ -131,17 +130,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
- assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
+ assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.'
binary = None
plugin = None
- for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
- loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
try:
- for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
+ for loaded_binary in plugin['hooks'].get_BINARIES().values():
if loaded_binary.name == key:
binary = loaded_binary
- plugin = loaded_plugin
+ plugin = plugin
# break # last write wins
except Exception as e:
print(e)
@@ -161,7 +159,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
"name": binary.name,
"description": binary.abspath,
"fields": {
- 'plugin': plugin.package,
+ 'plugin': plugin['package'],
'binprovider': binary.loaded_binprovider,
'abspath': binary.loaded_abspath,
'version': binary.loaded_version,
@@ -215,9 +213,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
return color
return 'black'
- for plugin_id in settings.PLUGINS.keys():
-
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ for plugin_id, plugin in abx.get_all_plugins().items():
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
@@ -263,7 +259,7 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert plugin_id, f'Could not find a plugin matching the specified name: {key}'
- plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
+ plugin = abx.get_plugin(plugin_id)
return ItemContext(
slug=key,
diff --git a/archivebox/core/__init__.py b/archivebox/core/__init__.py
index ac3ec769..9a301977 100644
--- a/archivebox/core/__init__.py
+++ b/archivebox/core/__init__.py
@@ -1,2 +1,31 @@
__package__ = 'archivebox.core'
+import abx
+
+@abx.hookimpl
+def register_admin(admin_site):
+ """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
+ from core.admin import register_admin
+ register_admin(admin_site)
+
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from archivebox.config.common import (
+ SHELL_CONFIG,
+ STORAGE_CONFIG,
+ GENERAL_CONFIG,
+ SERVER_CONFIG,
+ ARCHIVING_CONFIG,
+ SEARCH_BACKEND_CONFIG,
+ )
+ return {
+ 'SHELL_CONFIG': SHELL_CONFIG,
+ 'STORAGE_CONFIG': STORAGE_CONFIG,
+ 'GENERAL_CONFIG': GENERAL_CONFIG,
+ 'SERVER_CONFIG': SERVER_CONFIG,
+ 'ARCHIVING_CONFIG': ARCHIVING_CONFIG,
+ 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG,
+ }
+
diff --git a/archivebox/core/actors.py b/archivebox/core/actors.py
new file mode 100644
index 00000000..30b8245f
--- /dev/null
+++ b/archivebox/core/actors.py
@@ -0,0 +1,73 @@
+__package__ = 'archivebox.core'
+
+from typing import ClassVar
+
+from rich import print
+
+from django.db.models import QuerySet
+from django.utils import timezone
+from datetime import timedelta
+from core.models import Snapshot
+
+from actors.actor import ActorType
+
+
+class SnapshotActor(ActorType[Snapshot]):
+
+ QUERYSET: ClassVar[QuerySet] = Snapshot.objects.filter(status='queued')
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue
+ CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
+ CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue
+
+ # model_type: Type[ModelType]
+ MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores
+ MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object
+
+ def claim_sql_where(self) -> str:
+ """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
+ return self.CLAIM_WHERE
+
+ def claim_sql_set(self) -> str:
+ """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
+ retry_at = timezone.now() + timedelta(seconds=self.MAX_TICK_TIME)
+ # format as 2024-10-31 10:14:33.240903
+ retry_at_str = retry_at.strftime('%Y-%m-%d %H:%M:%S.%f')
+ return f'{self.CLAIM_SET}, retry_at = {retry_at_str}'
+
+ def claim_sql_order(self) -> str:
+ """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
+ return self.CLAIM_ORDER
+
+ def claim_from_top(self) -> int:
+ """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
+ return self.CLAIM_FROM_TOP
+
+ def tick(self, obj: Snapshot) -> None:
+ """override this to process the object"""
+ print(f'[blue]🏃♂️ {self}.tick()[/blue]', obj.abid or obj.id)
+ # For example:
+ # do_some_task(obj)
+ # do_something_else(obj)
+ # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
+ # raise NotImplementedError('tick() must be implemented by the Actor subclass')
+
+ def on_shutdown(self, err: BaseException | None=None) -> None:
+ print(f'[grey53]🏃♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
+ # abx.pm.hook.on_actor_shutdown(self)
+
+ def on_tick_start(self, obj: Snapshot) -> None:
+ # print(f'🏃♂️ {self}.on_tick_start()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
+ # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
+ pass
+
+ def on_tick_end(self, obj: Snapshot) -> None:
+ # print(f'🏃♂️ {self}.on_tick_end()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
+ # self.timer.end()
+ pass
+
+ def on_tick_exception(self, obj: Snapshot, err: BaseException) -> None:
+ print(f'[red]🏃♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
+ # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
diff --git a/archivebox/core/admin_archiveresults.py b/archivebox/core/admin_archiveresults.py
index aff7b1df..675f5f43 100644
--- a/archivebox/core/admin_archiveresults.py
+++ b/archivebox/core/admin_archiveresults.py
@@ -8,7 +8,7 @@ from django.utils.html import format_html, mark_safe
from django.core.exceptions import ValidationError
from django.urls import reverse, resolve
from django.utils import timezone
-from django.forms import forms
+from django_jsonform.forms.fields import JSONFormField
from huey_monitor.admin import TaskModel
@@ -83,7 +83,7 @@ class ArchiveResultInline(admin.TabularInline):
formset.form.base_fields['cmd_version'].initial = '-'
formset.form.base_fields['pwd'].initial = str(snapshot.link_dir)
formset.form.base_fields['created_by'].initial = request.user
- formset.form.base_fields['cmd'] = forms.JSONField(initial=['-'])
+ formset.form.base_fields['cmd'] = JSONFormField(initial=['-'])
formset.form.base_fields['output'].initial = 'Manually recorded cmd output...'
if obj is not None:
diff --git a/archivebox/core/admin_site.py b/archivebox/core/admin_site.py
index de92db8c..7aea2cf5 100644
--- a/archivebox/core/admin_site.py
+++ b/archivebox/core/admin_site.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
from django.contrib import admin
-import abx.django.use
+import archivebox
class ArchiveBoxAdmin(admin.AdminSite):
site_header = 'ArchiveBox'
@@ -37,6 +37,6 @@ def register_admin_site():
sites.site = archivebox_admin
# register all plugins admin classes
- abx.django.use.register_admin(archivebox_admin)
+ archivebox.pm.hook.register_admin(admin_site=archivebox_admin)
return archivebox_admin
diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py
index 870a77f8..b516678f 100644
--- a/archivebox/core/apps.py
+++ b/archivebox/core/apps.py
@@ -2,7 +2,7 @@ __package__ = 'archivebox.core'
from django.apps import AppConfig
-import abx
+import archivebox
class CoreConfig(AppConfig):
@@ -10,16 +10,11 @@ class CoreConfig(AppConfig):
def ready(self):
"""Register the archivebox.core.admin_site as the main django admin site"""
+ from django.conf import settings
+ archivebox.pm.hook.ready(settings=settings)
+
from core.admin_site import register_admin_site
register_admin_site()
- abx.pm.hook.ready()
-
-
-@abx.hookimpl
-def register_admin(admin_site):
- """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site"""
- from core.admin import register_admin
- register_admin(admin_site)
diff --git a/archivebox/core/models.py b/archivebox/core/models.py
index 79776b7f..a3962a6a 100644
--- a/archivebox/core/models.py
+++ b/archivebox/core/models.py
@@ -8,21 +8,25 @@ import os
import json
from pathlib import Path
+from datetime import timedelta
from django.db import models
from django.utils.functional import cached_property
from django.utils.text import slugify
+from django.utils import timezone
from django.core.cache import cache
from django.urls import reverse, reverse_lazy
from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin
from django.conf import settings
+from statemachine.mixins import MachineMixin
+
from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from queues.tasks import bg_archive_snapshot
-# from crawls.models import Crawl
+from crawls.models import Crawl
# from machine.models import Machine, NetworkInterface
from archivebox.misc.system import get_dir_size
@@ -152,7 +156,7 @@ class SnapshotManager(models.Manager):
return super().get_queryset().prefetch_related('tags', 'archiveresult_set') # .annotate(archiveresult_count=models.Count('archiveresult')).distinct()
-class Snapshot(ABIDModel):
+class Snapshot(ABIDModel, MachineMixin):
abid_prefix = 'snp_'
abid_ts_src = 'self.created_at'
abid_uri_src = 'self.url'
@@ -160,6 +164,17 @@ class Snapshot(ABIDModel):
abid_rand_src = 'self.id'
abid_drift_allowed = True
+ state_field_name = 'status'
+ state_machine_name = 'core.statemachines.SnapshotMachine'
+ state_machine_attr = 'sm'
+
+ class SnapshotStatus(models.TextChoices):
+ QUEUED = 'queued', 'Queued'
+ STARTED = 'started', 'Started'
+ SEALED = 'sealed', 'Sealed'
+
+ status = models.CharField(max_length=15, default=SnapshotStatus.QUEUED, null=False, blank=False)
+
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
@@ -171,7 +186,7 @@ class Snapshot(ABIDModel):
bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True)
downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True)
- # crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set')
+ crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set')
url = models.URLField(unique=True, db_index=True)
timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False)
@@ -396,6 +411,25 @@ class Snapshot(ABIDModel):
tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk)
self.tags.clear()
self.tags.add(*tags_id)
+
+ def has_pending_archiveresults(self) -> bool:
+ pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED]
+ pending_archiveresults = self.archiveresult_set.filter(status__in=pending_statuses)
+ return pending_archiveresults.exists()
+
+ def create_pending_archiveresults(self) -> list['ArchiveResult']:
+ archiveresults = []
+ for extractor in EXTRACTORS:
+ archiveresult, _created = ArchiveResult.objects.get_or_create(
+ snapshot=self,
+ extractor=extractor,
+ status=ArchiveResult.ArchiveResultStatus.QUEUED,
+ )
+ archiveresults.append(archiveresult)
+ return archiveresults
+
+ def bump_retry_at(self, seconds: int = 10):
+ self.retry_at = timezone.now() + timedelta(seconds=seconds)
# def get_storage_dir(self, create=True, symlink=True) -> Path:
@@ -452,6 +486,20 @@ class ArchiveResult(ABIDModel):
abid_subtype_src = 'self.extractor'
abid_rand_src = 'self.id'
abid_drift_allowed = True
+
+ state_field_name = 'status'
+ state_machine_name = 'core.statemachines.ArchiveResultMachine'
+ state_machine_attr = 'sm'
+
+ class ArchiveResultStatus(models.TextChoices):
+ QUEUED = 'queued', 'Queued'
+ STARTED = 'started', 'Started'
+ SUCCEEDED = 'succeeded', 'Succeeded'
+ FAILED = 'failed', 'Failed'
+ SKIPPED = 'skipped', 'Skipped'
+ BACKOFF = 'backoff', 'Waiting to retry'
+
+ status = models.CharField(max_length=15, choices=ArchiveResultStatus.choices, default=ArchiveResultStatus.QUEUED, null=False, blank=False)
EXTRACTOR_CHOICES = (
('htmltotext', 'htmltotext'),
@@ -469,11 +517,7 @@ class ArchiveResult(ABIDModel):
('title', 'title'),
('wget', 'wget'),
)
- STATUS_CHOICES = [
- ("succeeded", "succeeded"),
- ("failed", "failed"),
- ("skipped", "skipped")
- ]
+
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
@@ -491,7 +535,6 @@ class ArchiveResult(ABIDModel):
output = models.CharField(max_length=1024)
start_ts = models.DateTimeField(db_index=True)
end_ts = models.DateTimeField()
- status = models.CharField(max_length=16, choices=STATUS_CHOICES)
# the network interface that was used to download this result
# uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
@@ -552,7 +595,15 @@ class ArchiveResult(ABIDModel):
return link.canonical_outputs().get(f'{self.extractor}_path')
def output_exists(self) -> bool:
- return os.access(self.output_path(), os.R_OK)
+ return os.path.exists(self.output_path())
+
+ def bump_retry_at(self, seconds: int = 10):
+ self.retry_at = timezone.now() + timedelta(seconds=seconds)
+
+ def create_output_dir(self):
+ snap_dir = self.snapshot_dir
+ snap_dir.mkdir(parents=True, exist_ok=True)
+ return snap_dir / self.output_path()
# def get_storage_dir(self, create=True, symlink=True):
diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py
index 3810954e..e7d673ac 100644
--- a/archivebox/core/settings.py
+++ b/archivebox/core/settings.py
@@ -9,13 +9,12 @@ from pathlib import Path
from django.utils.crypto import get_random_string
import abx
-import abx.archivebox
-import abx.archivebox.reads
-import abx.django.use
+import archivebox
-from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
+from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
+
IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3]
IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ
IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3]
@@ -25,45 +24,8 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
### ArchiveBox Plugin Settings
################################################################################
-PLUGIN_HOOKSPECS = [
- 'abx.django.hookspec',
- 'abx.pydantic_pkgr.hookspec',
- 'abx.archivebox.hookspec',
-]
-abx.register_hookspecs(PLUGIN_HOOKSPECS)
-
-BUILTIN_PLUGIN_DIRS = {
- 'archivebox': PACKAGE_DIR,
- 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
- 'plugins_auth': PACKAGE_DIR / 'plugins_auth',
- 'plugins_search': PACKAGE_DIR / 'plugins_search',
- 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
-}
-USER_PLUGIN_DIRS = {
- # 'user_plugins': DATA_DIR / 'user_plugins',
-}
-
-# Discover ArchiveBox plugins
-BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
-PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
-USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
-ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
-
-# Load ArchiveBox plugins
-PLUGIN_MANAGER = abx.pm
-abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
-PLUGINS = abx.archivebox.reads.get_PLUGINS()
-
-# Load ArchiveBox config from plugins
-CONFIGS = abx.archivebox.reads.get_CONFIGS()
-CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
-BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
-BINARIES = abx.archivebox.reads.get_BINARIES()
-EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
-SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
-# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
-# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
-
+ALL_PLUGINS = archivebox.ALL_PLUGINS
+LOADED_PLUGINS = archivebox.LOADED_PLUGINS
################################################################################
### Django Core Settings
@@ -102,7 +64,8 @@ INSTALLED_APPS = [
# 'abid_utils', # handles ABID ID creation, handling, and models
'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here)
'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc.
- 'queues', # handles starting and managing background workers and processes
+ 'actors', # handles starting and managing background workers and processes (orchestrators and actors)
+ 'queues', # handles starting and managing background workers and processes (supervisord)
'seeds', # handles Seed model and URL source management
'crawls', # handles Crawl and CrawlSchedule models and management
'personas', # handles Persona and session management
@@ -110,7 +73,7 @@ INSTALLED_APPS = [
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins
- *abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
+ *abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@@ -125,6 +88,7 @@ INSTALLED_APPS = [
+
MIDDLEWARE = [
'core.middleware.TimezoneMiddleware',
'django.middleware.security.SecurityMiddleware',
@@ -135,7 +99,7 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware',
- *abx.django.use.get_MIDDLEWARES(),
+ *abx.as_list(abx.pm.hook.get_MIDDLEWARES()),
]
@@ -148,7 +112,7 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
- *abx.django.use.get_AUTHENTICATION_BACKENDS(),
+ *abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()),
]
@@ -169,7 +133,7 @@ AUTHENTICATION_BACKENDS = [
STATIC_URL = '/static/'
TEMPLATES_DIR_NAME = 'templates'
-CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir()
+CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK)
STATICFILES_DIRS = [
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []),
# *[
@@ -177,7 +141,7 @@ STATICFILES_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir()
# ],
- *abx.django.use.get_STATICFILES_DIRS(),
+ *abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
]
@@ -188,7 +152,7 @@ TEMPLATE_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir()
# ],
- *abx.django.use.get_TEMPLATE_DIRS(),
+ *abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@@ -228,7 +192,7 @@ SQLITE_CONNECTION_OPTIONS = {
# https://gcollazo.com/optimal-sqlite-settings-for-django/
# https://litestream.io/tips/#busy-timeout
# https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options
- "timeout": 5,
+ "timeout": 10,
"check_same_thread": False,
"transaction_mode": "IMMEDIATE",
"init_command": (
@@ -267,7 +231,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file
HUEY = {
"huey_class": "huey.SqliteHuey",
"filename": CONSTANTS.QUEUE_DATABASE_FILENAME,
- "name": "system_tasks",
+ "name": "commands",
"results": True,
"store_none": True,
"immediate": False,
@@ -288,11 +252,11 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file
# https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up
# https://github.com/gaiacoop/django-huey
DJANGO_HUEY = {
- "default": "system_tasks",
+ "default": "commands",
"queues": {
HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register()
- **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME),
+ **abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)),
},
}
@@ -517,7 +481,7 @@ ADMIN_DATA_VIEWS = {
"name": "log",
},
},
- *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
+ *abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()),
],
}
@@ -611,7 +575,4 @@ if DEBUG_REQUESTS_TRACKER:
# JET_TOKEN = 'some-api-token-here'
-abx.django.use.register_checks()
-# abx.archivebox.reads.register_all_hooks(globals())
-
# import ipdb; ipdb.set_trace()
diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py
index d9fc28bd..d292e15a 100644
--- a/archivebox/core/settings_logging.py
+++ b/archivebox/core/settings_logging.py
@@ -163,11 +163,6 @@ SETTINGS_LOGGING = {
"level": "DEBUG",
"propagate": False,
},
- "plugins_extractor": {
- "handlers": ["default", "logfile"],
- "level": "DEBUG",
- "propagate": False,
- },
"httpx": {
"handlers": ["outbound_webhooks"],
"level": "INFO",
diff --git a/archivebox/core/statemachines.py b/archivebox/core/statemachines.py
new file mode 100644
index 00000000..a2425d43
--- /dev/null
+++ b/archivebox/core/statemachines.py
@@ -0,0 +1,115 @@
+__package__ = 'archivebox.snapshots'
+
+from django.utils import timezone
+
+from statemachine import State, StateMachine
+
+from core.models import Snapshot, ArchiveResult
+
+# State Machine Definitions
+#################################################
+
+
+class SnapshotMachine(StateMachine, strict_states=True):
+ """State machine for managing Snapshot lifecycle."""
+
+ model: Snapshot
+
+ # States
+ queued = State(value=Snapshot.SnapshotStatus.QUEUED, initial=True)
+ started = State(value=Snapshot.SnapshotStatus.STARTED)
+ sealed = State(value=Snapshot.SnapshotStatus.SEALED, final=True)
+
+ # Tick Event
+ tick = (
+ queued.to.itself(unless='can_start', internal=True) |
+ queued.to(started, cond='can_start') |
+ started.to.itself(unless='is_finished', internal=True) |
+ started.to(sealed, cond='is_finished')
+ )
+
+ def __init__(self, snapshot, *args, **kwargs):
+ self.snapshot = snapshot
+ super().__init__(snapshot, *args, **kwargs)
+
+ def can_start(self) -> bool:
+ return self.snapshot.seed and self.snapshot.seed.uri
+
+ def is_finished(self) -> bool:
+ return not self.snapshot.has_pending_archiveresults()
+
+ def on_started(self):
+ self.snapshot.create_pending_archiveresults()
+ self.snapshot.bump_retry_at(seconds=60)
+ self.snapshot.save()
+
+ def on_sealed(self):
+ self.snapshot.retry_at = None
+ self.snapshot.save()
+
+class ArchiveResultMachine(StateMachine, strict_states=True):
+ """State machine for managing ArchiveResult lifecycle."""
+
+ model: ArchiveResult
+
+ # States
+ queued = State(value=ArchiveResult.ArchiveResultStatus.QUEUED, initial=True)
+ started = State(value=ArchiveResult.ArchiveResultStatus.STARTED)
+ backoff = State(value=ArchiveResult.ArchiveResultStatus.BACKOFF)
+ succeeded = State(value=ArchiveResult.ArchiveResultStatus.SUCCEEDED, final=True)
+ failed = State(value=ArchiveResult.ArchiveResultStatus.FAILED, final=True)
+
+ # Tick Event
+ tick = (
+ queued.to.itself(unless='can_start', internal=True) |
+ queued.to(started, cond='can_start') |
+ started.to.itself(unless='is_finished', internal=True) |
+ started.to(succeeded, cond='is_succeeded') |
+ started.to(failed, cond='is_failed') |
+ started.to(backoff, cond='is_backoff') |
+ backoff.to.itself(unless='can_start', internal=True) |
+ backoff.to(started, cond='can_start') |
+ backoff.to(succeeded, cond='is_succeeded') |
+ backoff.to(failed, cond='is_failed')
+ )
+
+ def __init__(self, archiveresult, *args, **kwargs):
+ self.archiveresult = archiveresult
+ super().__init__(archiveresult, *args, **kwargs)
+
+ def can_start(self) -> bool:
+ return self.archiveresult.snapshot and self.archiveresult.snapshot.is_started()
+
+ def is_succeeded(self) -> bool:
+ return self.archiveresult.output_exists()
+
+ def is_failed(self) -> bool:
+ return not self.archiveresult.output_exists()
+
+ def is_backoff(self) -> bool:
+ return self.archiveresult.status == ArchiveResult.ArchiveResultStatus.BACKOFF
+
+ def on_started(self):
+ self.archiveresult.start_ts = timezone.now()
+ self.archiveresult.create_output_dir()
+ self.archiveresult.bump_retry_at(seconds=60)
+ self.archiveresult.save()
+
+ def on_backoff(self):
+ self.archiveresult.bump_retry_at(seconds=60)
+ self.archiveresult.save()
+
+ def on_succeeded(self):
+ self.archiveresult.end_ts = timezone.now()
+ self.archiveresult.save()
+
+ def on_failed(self):
+ self.archiveresult.end_ts = timezone.now()
+ self.archiveresult.save()
+
+ def after_transition(self, event: str, source: State, target: State):
+ print(f"after '{event}' from '{source.id}' to '{target.id}'")
+ # self.archiveresult.save_merkle_index()
+ # self.archiveresult.save_html_index()
+ # self.archiveresult.save_json_index()
+ return "after_transition"
diff --git a/archivebox/core/views.py b/archivebox/core/views.py
index d423c146..e425c8fe 100644
--- a/archivebox/core/views.py
+++ b/archivebox/core/views.py
@@ -12,7 +12,6 @@ from django.views import View
from django.views.generic.list import ListView
from django.views.generic import FormView
from django.db.models import Q
-from django.conf import settings
from django.contrib import messages
from django.contrib.auth.mixins import UserPassesTestMixin
from django.views.decorators.csrf import csrf_exempt
@@ -21,6 +20,7 @@ from django.utils.decorators import method_decorator
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
+import archivebox
from core.models import Snapshot
from core.forms import AddLinkForm
@@ -32,9 +32,8 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG
from archivebox.misc.util import base_url, htmlencode, ts_to_date_str
from archivebox.misc.serve_static import serve_static_with_byterange_support
-from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
-from ..logging_util import printable_filesize
-from ..search import query_search_index
+from archivebox.logging_util import printable_filesize
+from archivebox.search import query_search_index
class HomepageView(View):
@@ -69,7 +68,7 @@ class SnapshotView(View):
and embed_path
and os.access(abs_path, os.R_OK)
and abs_path.exists()):
- if abs_path.is_dir() and not any(abs_path.glob('*.*')):
+ if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')):
continue
result_info = {
@@ -103,7 +102,7 @@ class SnapshotView(View):
# iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
snap_dir = Path(snapshot.link_dir)
- assert os.access(snap_dir, os.R_OK) and os.access(snap_dir, os.X_OK)
+ assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK)
for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
extension = result_file.suffix.lstrip('.').lower()
@@ -154,7 +153,7 @@ class SnapshotView(View):
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
'warc_path': warc_path,
- 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
+ 'SAVE_ARCHIVE_DOT_ORG': archivebox.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']),
'best_result': best_result,
@@ -500,21 +499,25 @@ class HealthCheckView(View):
def find_config_section(key: str) -> str:
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
if key in CONSTANTS_CONFIG:
return 'CONSTANT'
matching_sections = [
- section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
+ section_id for section_id, section in CONFIGS.items() if key in section.model_fields
]
section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section
def find_config_default(key: str) -> str:
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
if key in CONSTANTS_CONFIG:
return str(CONSTANTS_CONFIG[key])
default_val = None
- for config in settings.CONFIGS.values():
+ for config in CONFIGS.values():
if key in config.model_fields:
default_val = config.model_fields[key].default
break
@@ -530,7 +533,9 @@ def find_config_default(key: str) -> str:
return default_val
def find_config_type(key: str) -> str:
- for config in settings.CONFIGS.values():
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
+ for config in CONFIGS.values():
if hasattr(config, key):
type_hints = get_type_hints(config)
try:
@@ -547,7 +552,8 @@ def key_is_safe(key: str) -> bool:
@render_with_table_view
def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
-
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
rows = {
@@ -560,7 +566,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Aliases": [],
}
- for section_id, section in reversed(list(settings.CONFIGS.items())):
+ for section_id, section in reversed(list(CONFIGS.items())):
for key, field in section.model_fields.items():
rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key))
@@ -570,7 +576,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# rows['Documentation'].append(mark_safe(f'Wiki: {key}'))
# rows['Aliases'].append(', '.join(find_config_aliases(key)))
-
section = 'CONSTANT'
for key in CONSTANTS_CONFIG.keys():
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
@@ -589,7 +594,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
@render_with_item_view
def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
-
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
+
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
# aliases = USER_CONFIG.get(key, {}).get("aliases", [])
@@ -597,7 +604,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
if key in CONSTANTS_CONFIG:
section_header = mark_safe(f'[CONSTANTS] {key}
(read-only, hardcoded by ArchiveBox)')
- elif key in settings.FLAT_CONFIG:
+ elif key in FLAT_CONFIG:
section_header = mark_safe(f'data / ArchiveBox.conf [{find_config_section(key)}] {key}
')
else:
section_header = mark_safe(f'[DYNAMIC CONFIG] {key}
(read-only, calculated at runtime)')
@@ -613,7 +620,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
"fields": {
'Key': key,
'Type': find_config_type(key),
- 'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********',
+ 'Value': FLAT_CONFIG.get(key, CONFIGS.get(key, None)) if key_is_safe(key) else '********',
},
"help_texts": {
'Key': mark_safe(f'''
@@ -635,13 +642,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
{find_config_default(key) or '↗️ See in ArchiveBox source code...'}
-
+
To change this value, edit data/ArchiveBox.conf
or run:
archivebox config --set {key}="{
val.strip("'")
if (val := find_config_default(key)) else
- (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
+ (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
}"
'''),
diff --git a/archivebox/crawls/actors.py b/archivebox/crawls/actors.py
new file mode 100644
index 00000000..f159956e
--- /dev/null
+++ b/archivebox/crawls/actors.py
@@ -0,0 +1,69 @@
+__package__ = 'archivebox.crawls'
+
+from typing import ClassVar
+
+from rich import print
+
+from django.db.models import QuerySet
+
+from crawls.models import Crawl
+
+from actors.actor import ActorType
+
+
+class CrawlActor(ActorType[Crawl]):
+
+ QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued')
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue
+ CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue
+ CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue
+
+ # model_type: Type[ModelType]
+ MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores
+ MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object
+
+ def claim_sql_where(self) -> str:
+ """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """
+ return self.CLAIM_WHERE
+
+ def claim_sql_set(self) -> str:
+ """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """
+ return self.CLAIM_SET
+
+ def claim_sql_order(self) -> str:
+ """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """
+ return self.CLAIM_ORDER
+
+ def claim_from_top(self) -> int:
+ """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue"""
+ return self.CLAIM_FROM_TOP
+
+ def tick(self, obj: Crawl) -> None:
+ """override this to process the object"""
+ print(f'[blue]🏃♂️ {self}.tick()[/blue]', obj.abid or obj.id)
+ # For example:
+ # do_some_task(obj)
+ # do_something_else(obj)
+ # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success')
+ # raise NotImplementedError('tick() must be implemented by the Actor subclass')
+
+ def on_shutdown(self, err: BaseException | None=None) -> None:
+ print(f'[grey53]🏃♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]')
+ # abx.pm.hook.on_actor_shutdown(self)
+
+ def on_tick_start(self, obj: Crawl) -> None:
+ # print(f'🏃♂️ {self}.on_tick_start()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_start(self, obj_to_process)
+ # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ')
+ pass
+
+ def on_tick_end(self, obj: Crawl) -> None:
+ # print(f'🏃♂️ {self}.on_tick_end()', obj.abid or obj.id)
+ # abx.pm.hook.on_actor_tick_end(self, obj_to_process)
+ # self.timer.end()
+ pass
+
+ def on_tick_exception(self, obj: Crawl, err: BaseException) -> None:
+ print(f'[red]🏃♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err)
+ # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err)
diff --git a/archivebox/crawls/models.py b/archivebox/crawls/models.py
index a806d889..ff9e0d0a 100644
--- a/archivebox/crawls/models.py
+++ b/archivebox/crawls/models.py
@@ -1,13 +1,20 @@
__package__ = 'archivebox.crawls'
+from typing import TYPE_CHECKING
from django_stubs_ext.db.models import TypedModelMeta
+from datetime import timedelta
+
from django.db import models
-from django.db.models import Q
from django.core.validators import MaxValueValidator, MinValueValidator
from django.conf import settings
-from django.utils import timezone
from django.urls import reverse_lazy
+from django.utils import timezone
+
+from statemachine.mixins import MachineMixin
+
+if TYPE_CHECKING:
+ from core.models import Snapshot
from seeds.models import Seed
@@ -41,8 +48,9 @@ class CrawlSchedule(ABIDModel, ModelWithHealthStats):
"""The base crawl that each new scheduled job should copy as a template"""
return self.crawl_set.first()
+
-class Crawl(ABIDModel, ModelWithHealthStats):
+class Crawl(ABIDModel, ModelWithHealthStats, MachineMixin):
"""
A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak.
@@ -55,16 +63,29 @@ class Crawl(ABIDModel, ModelWithHealthStats):
abid_prefix = 'crl_'
abid_ts_src = 'self.created_at'
abid_uri_src = 'self.seed.uri'
- abid_subtype_src = 'self.persona_id'
+ abid_subtype_src = 'self.persona'
abid_rand_src = 'self.id'
abid_drift_allowed = True
+
+ state_field_name = 'status'
+ state_machine_name = 'crawls.statemachines.CrawlMachine'
+ state_machine_attr = 'sm'
+ bind_events_as_methods = True
+ class CrawlStatus(models.TextChoices):
+ QUEUED = 'queued', 'Queued'
+ STARTED = 'started', 'Started'
+ SEALED = 'sealed', 'Sealed'
+
+ status = models.CharField(choices=CrawlStatus.choices, max_length=15, default=CrawlStatus.QUEUED, null=False, blank=False)
+
id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
abid = ABIDField(prefix=abid_prefix)
created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set')
created_at = AutoDateTimeField(default=None, null=False, db_index=True)
modified_at = models.DateTimeField(auto_now=True)
+
seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False)
max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)])
@@ -79,7 +100,7 @@ class Crawl(ABIDModel, ModelWithHealthStats):
# schedule = models.JSONField()
# config = models.JSONField()
- # snapshot_set: models.Manager['Snapshot']
+ snapshot_set: models.Manager['Snapshot']
class Meta(TypedModelMeta):
@@ -102,6 +123,28 @@ class Crawl(ABIDModel, ModelWithHealthStats):
@property
def api_docs_url(self) -> str:
return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
+
+ def has_pending_archiveresults(self) -> bool:
+ from core.models import ArchiveResult
+
+ pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED]
+
+ snapshot_ids = self.snapshot_set.values_list('id', flat=True)
+ pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, status__in=pending_statuses)
+ return pending_archiveresults.exists()
+
+ def create_root_snapshot(self) -> 'Snapshot':
+ from core.models import Snapshot
+
+ root_snapshot, _ = Snapshot.objects.get_or_create(
+ crawl=self,
+ url=self.seed.uri,
+ )
+ return root_snapshot
+
+ def bump_retry_at(self, seconds: int = 10):
+ self.retry_at = timezone.now() + timedelta(seconds=seconds)
+ self.save()
class Outlink(models.Model):
diff --git a/archivebox/crawls/statemachines.py b/archivebox/crawls/statemachines.py
new file mode 100644
index 00000000..b7e43daf
--- /dev/null
+++ b/archivebox/crawls/statemachines.py
@@ -0,0 +1,48 @@
+__package__ = 'archivebox.crawls'
+
+from statemachine import State, StateMachine
+
+from crawls.models import Crawl
+
+# State Machine Definitions
+#################################################
+
+
+class CrawlMachine(StateMachine, strict_states=True):
+ """State machine for managing Crawl lifecycle."""
+
+ model: Crawl
+
+ # States
+ queued = State(value=Crawl.CrawlStatus.QUEUED, initial=True)
+ started = State(value=Crawl.CrawlStatus.STARTED)
+ sealed = State(value=Crawl.CrawlStatus.SEALED, final=True)
+
+ # Tick Event
+ tick = (
+ queued.to.itself(unless='can_start', internal=True) |
+ queued.to(started, cond='can_start') |
+ started.to.itself(unless='is_finished', internal=True) |
+ started.to(sealed, cond='is_finished')
+ )
+
+ def __init__(self, crawl, *args, **kwargs):
+ self.crawl = crawl
+ super().__init__(crawl, *args, **kwargs)
+
+ def can_start(self) -> bool:
+ return self.crawl.seed and self.crawl.seed.uri
+
+ def is_finished(self) -> bool:
+ return not self.crawl.has_pending_archiveresults()
+
+
+
+ def on_started(self):
+ self.crawl.create_root_snapshot()
+ self.crawl.bump_retry_at(seconds=10)
+ self.crawl.save()
+
+ def on_sealed(self):
+ self.crawl.retry_at = None
+ self.crawl.save()
diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py
index 07ebb415..42f9d6c7 100644
--- a/archivebox/extractors/__init__.py
+++ b/archivebox/extractors/__init__.py
@@ -27,43 +27,29 @@ from ..logging_util import (
log_archive_method_finished,
)
-from .title import should_save_title, save_title
-from .favicon import should_save_favicon, save_favicon
-from .wget import should_save_wget, save_wget
-from .singlefile import should_save_singlefile, save_singlefile
-from .readability import should_save_readability, save_readability
-from .mercury import should_save_mercury, save_mercury
-from .htmltotext import should_save_htmltotext, save_htmltotext
-from .pdf import should_save_pdf, save_pdf
-from .screenshot import should_save_screenshot, save_screenshot
-from .dom import should_save_dom, save_dom
-from .git import should_save_git, save_git
-from .media import should_save_media, save_media
-from .archive_org import should_save_archive_dot_org, save_archive_dot_org
-from .headers import should_save_headers, save_headers
-
ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool]
SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult]
ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction]
def get_default_archive_methods() -> List[ArchiveMethodEntry]:
+ # TODO: move to abx.pm.hook.get_EXTRACTORS()
return [
- ('favicon', should_save_favicon, save_favicon),
- ('headers', should_save_headers, save_headers),
- ('singlefile', should_save_singlefile, save_singlefile),
- ('pdf', should_save_pdf, save_pdf),
- ('screenshot', should_save_screenshot, save_screenshot),
- ('dom', should_save_dom, save_dom),
- ('wget', should_save_wget, save_wget),
- # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
- ('title', should_save_title, save_title),
- ('readability', should_save_readability, save_readability),
- ('mercury', should_save_mercury, save_mercury),
- ('htmltotext', should_save_htmltotext, save_htmltotext),
- ('git', should_save_git, save_git),
- ('media', should_save_media, save_media),
- ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
+ # ('favicon', should_save_favicon, save_favicon),
+ # ('headers', should_save_headers, save_headers),
+ # ('singlefile', should_save_singlefile, save_singlefile),
+ # ('pdf', should_save_pdf, save_pdf),
+ # ('screenshot', should_save_screenshot, save_screenshot),
+ # ('dom', should_save_dom, save_dom),
+ # ('wget', should_save_wget, save_wget),
+ # # keep title, readability, and htmltotext below wget and singlefile, as they depend on them
+ # ('title', should_save_title, save_title),
+ # ('readability', should_save_readability, save_readability),
+ # ('mercury', should_save_mercury, save_mercury),
+ # ('htmltotext', should_save_htmltotext, save_htmltotext),
+ # ('git', should_save_git, save_git),
+ # ('media', should_save_media, save_media),
+ # ('archive_org', should_save_archive_dot_org, save_archive_dot_org),
]
ARCHIVE_METHODS_INDEXING_PRECEDENCE = [
diff --git a/archivebox/index/html.py b/archivebox/index/html.py
index eae93e67..24cad5c0 100644
--- a/archivebox/index/html.py
+++ b/archivebox/index/html.py
@@ -8,6 +8,8 @@ from typing import List, Optional, Iterator, Mapping
from django.utils.html import format_html, mark_safe # type: ignore
from django.core.cache import cache
+import abx
+
from archivebox.misc.system import atomic_write
from archivebox.misc.util import (
enforce_types,
@@ -19,7 +21,6 @@ from archivebox.misc.util import (
from archivebox.config import CONSTANTS, DATA_DIR, VERSION
from archivebox.config.common import SERVER_CONFIG
from archivebox.config.version import get_COMMIT_HASH
-from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG
from .schema import Link
from ..logging_util import printable_filesize
@@ -79,8 +80,10 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None:
@enforce_types
def link_details_template(link: Link) -> str:
-
- from ..extractors.wget import wget_output_path
+
+ from abx_plugin_wget_extractor.wget import wget_output_path
+
+ SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG
link_info = link._asdict(extended=True)
@@ -102,7 +105,7 @@ def link_details_template(link: Link) -> str:
'status': 'archived' if link.is_archived else 'not yet archived',
'status_color': 'success' if link.is_archived else 'danger',
'oldest_archive_date': ts_to_date_str(link.oldest_archive_date),
- 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG,
+ 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG,
'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS,
})
diff --git a/archivebox/index/json.py b/archivebox/index/json.py
index 8671369a..0a484c75 100644
--- a/archivebox/index/json.py
+++ b/archivebox/index/json.py
@@ -8,6 +8,8 @@ from pathlib import Path
from datetime import datetime, timezone
from typing import List, Optional, Iterator, Any, Union
+import abx
+
from archivebox.config import VERSION, DATA_DIR, CONSTANTS
from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG
@@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types
@enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
- from django.conf import settings
-
MAIN_INDEX_HEADER = {
'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.',
'schema': 'archivebox.index.json',
@@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool):
'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki',
'source': 'https://github.com/ArchiveBox/ArchiveBox',
'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues',
- 'dependencies': settings.BINARIES,
+ 'dependencies': dict(abx.pm.hook.get_BINARIES()),
},
}
-
if with_headers:
output = {
**MAIN_INDEX_HEADER,
diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py
index a3c0e967..78e80ef9 100644
--- a/archivebox/index/schema.py
+++ b/archivebox/index/schema.py
@@ -17,9 +17,9 @@ from dataclasses import dataclass, asdict, field, fields
from django.utils.functional import cached_property
-from archivebox.config import ARCHIVE_DIR, CONSTANTS
+import abx
-from plugins_extractor.favicon.config import FAVICON_CONFIG
+from archivebox.config import ARCHIVE_DIR, CONSTANTS
from archivebox.misc.system import get_dir_size
from archivebox.misc.util import ts_to_date_str, parse_date
@@ -426,7 +426,10 @@ class Link:
def canonical_outputs(self) -> Dict[str, Optional[str]]:
"""predict the expected output paths that should be present after archiving"""
- from ..extractors.wget import wget_output_path
+ from abx_plugin_wget.wget import wget_output_path
+
+ FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon
+
# TODO: banish this awful duplication from the codebase and import these
# from their respective extractor files
canonical = {
diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py
index 229e1d83..7686b73e 100644
--- a/archivebox/machine/models.py
+++ b/archivebox/machine/models.py
@@ -8,9 +8,10 @@ from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
-import abx.archivebox.reads
+import abx
+import archivebox
-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
+from pydantic_pkgr import Binary, BinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@@ -180,7 +181,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats):
class InstalledBinaryManager(models.Manager):
- def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary':
+ def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary':
"""Get or create an InstalledBinary record for a Binary on the local machine"""
global _CURRENT_BINARIES
@@ -216,7 +217,7 @@ class InstalledBinaryManager(models.Manager):
# if binary was not yet loaded from filesystem, do it now
# this is expensive, we have to find it's abspath, version, and sha256, but it's necessary
# to make sure we have a good, up-to-date record of it in the DB & in-memroy cache
- binary = binary.load(fresh=True)
+ binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True)
assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256'
@@ -291,8 +292,8 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
if not hasattr(self, 'machine'):
self.machine = Machine.objects.current()
if not self.binprovider:
- all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
- binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
+ all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values())
+ binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True)
self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
if not self.abspath:
self.abspath = self.BINPROVIDER.get_abspath(self.name)
@@ -304,16 +305,16 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
super().clean(*args, **kwargs)
@cached_property
- def BINARY(self) -> BaseBinary:
- for binary in abx.archivebox.reads.get_BINARIES().values():
+ def BINARY(self) -> Binary:
+ for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values():
if binary.name == self.name:
return binary
raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
# TODO: we could technically reconstruct it from scratch, but why would we ever want to do that?
@cached_property
- def BINPROVIDER(self) -> BaseBinProvider:
- for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
+ def BINPROVIDER(self) -> BinProvider:
+ for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values():
if binprovider.name == self.binprovider:
return binprovider
raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')
@@ -321,7 +322,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# maybe not a good idea to provide this? Binary in DB is a record of the binary's config
# whereas a loaded binary is a not-yet saved instance that may not have the same config
# why would we want to load a binary record from the db when it could be freshly loaded?
- def load_from_db(self) -> BaseBinary:
+ def load_from_db(self) -> Binary:
# TODO: implement defaults arg in pydantic_pkgr
# return self.BINARY.load(defaults={
# 'binprovider': self.BINPROVIDER,
@@ -330,7 +331,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# 'sha256': self.sha256,
# })
- return BaseBinary.model_validate({
+ return Binary.model_validate({
**self.BINARY.model_dump(),
'abspath': self.abspath and Path(self.abspath),
'version': self.version,
@@ -340,5 +341,5 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
'overrides': self.BINARY.overrides,
})
- def load_fresh(self) -> BaseBinary:
- return self.BINARY.load(fresh=True)
+ def load_fresh(self) -> Binary:
+ return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True)
diff --git a/archivebox/main.py b/archivebox/main.py
index a3db809f..9ce0b9bd 100755
--- a/archivebox/main.py
+++ b/archivebox/main.py
@@ -14,6 +14,10 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet
from django.utils import timezone
+from pydantic_pkgr import Binary
+
+import abx
+import archivebox
from archivebox.misc.checks import check_data_folder
from archivebox.misc.util import enforce_types # type: ignore
from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT
@@ -22,7 +26,7 @@ from archivebox.misc.logging import stderr, hint
from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR
from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG
from archivebox.config.permissions import SudoPermission, IN_DOCKER
-from archivebox.config.configfile import (
+from archivebox.config.collection import (
write_config_file,
load_all_config,
get_real_name,
@@ -195,15 +199,13 @@ def version(quiet: bool=False,
console = Console()
prnt = console.print
- from django.conf import settings
-
- from abx.archivebox.base_binary import BaseBinary, apt, brew, env
+ from abx_plugin_default_binproviders import apt, brew, env
from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID
from archivebox.config.paths import get_data_locations, get_code_locations
- from plugins_auth.ldap.config import LDAP_CONFIG
+ LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED
# 0.7.1
@@ -242,7 +244,7 @@ def version(quiet: bool=False,
f'SUDO={CONSTANTS.IS_ROOT}',
f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}',
f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}',
- f'LDAP={LDAP_CONFIG.LDAP_ENABLED}',
+ f'LDAP={LDAP_ENABLED}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
prnt()
@@ -264,7 +266,8 @@ def version(quiet: bool=False,
prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]')
failures = []
- for name, binary in list(settings.BINARIES.items()):
+ BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES())
+ for name, binary in list(BINARIES.items()):
if binary.name == 'archivebox':
continue
@@ -295,14 +298,15 @@ def version(quiet: bool=False,
prnt()
prnt('[gold3][i] Package Managers:[/gold3]')
- for name, binprovider in list(settings.BINPROVIDERS.items()):
+ BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS())
+ for name, binprovider in list(BINPROVIDERS.items()):
err = None
if binproviders and binprovider.name not in binproviders:
continue
# TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN
- loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
+ loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew])
abspath = None
if loaded_bin.abspath:
@@ -1050,9 +1054,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
# - recommend user re-run with sudo if any deps need to be installed as root
from rich import print
- from django.conf import settings
- from archivebox import CONSTANTS
from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from archivebox.config.paths import get_or_create_working_lib_dir
@@ -1075,11 +1077,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
package_manager_names = ', '.join(
f'[yellow]{binprovider.name}[/yellow]'
- for binprovider in list(settings.BINPROVIDERS.values())
+ for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values()))
if not binproviders or (binproviders and binprovider.name in binproviders)
)
print(f'[+] Setting up package managers {package_manager_names}...')
- for binprovider in list(settings.BINPROVIDERS.values()):
+ for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())):
if binproviders and binprovider.name not in binproviders:
continue
try:
@@ -1092,7 +1094,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
print()
- for binary in list(settings.BINARIES.values()):
+ for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())):
if binary.name in ('archivebox', 'django', 'sqlite', 'python'):
# obviously must already be installed if we are running
continue
@@ -1122,7 +1124,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
- result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+ loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False)
+ result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if result and result['loaded_version']:
break
except Exception as e:
@@ -1133,7 +1136,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
sys.stderr.write("\033[00m\n") # reset
else:
- binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
+ loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run)
+ result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'})
if IS_ROOT and LIB_DIR:
with SudoPermission(uid=0):
if ARCHIVEBOX_USER == 0:
@@ -1157,7 +1161,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina
print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr)
- from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
+ from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
extra_args = []
if binproviders:
@@ -1183,8 +1187,6 @@ def config(config_options_str: Optional[str]=None,
out_dir: Path=DATA_DIR) -> None:
"""Get and set your ArchiveBox project configuration values"""
- import abx.archivebox.reads
-
from rich import print
check_data_folder()
@@ -1198,7 +1200,8 @@ def config(config_options_str: Optional[str]=None,
elif config_options_str:
config_options = config_options_str.split('\n')
- from django.conf import settings
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
+ CONFIGS = archivebox.pm.hook.get_CONFIGS()
config_options = config_options or []
@@ -1208,8 +1211,8 @@ def config(config_options_str: Optional[str]=None,
if search:
if config_options:
config_options = [get_real_name(key) for key in config_options]
- matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
- for config_section in settings.CONFIGS.values():
+ matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+ for config_section in CONFIGS.values():
aliases = config_section.aliases
for search_key in config_options:
@@ -1228,15 +1231,15 @@ def config(config_options_str: Optional[str]=None,
elif get or no_args:
if config_options:
config_options = [get_real_name(key) for key in config_options]
- matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG}
- failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG]
+ matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG}
+ failed_config = [key for key in config_options if key not in FLAT_CONFIG]
if failed_config:
stderr()
stderr('[X] These options failed to get', color='red')
stderr(' {}'.format('\n '.join(config_options)))
raise SystemExit(1)
else:
- matching_config = settings.FLAT_CONFIG
+ matching_config = FLAT_CONFIG
print(printable_config(matching_config))
raise SystemExit(not matching_config)
@@ -1257,20 +1260,20 @@ def config(config_options_str: Optional[str]=None,
if key != raw_key:
stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
- if key in settings.FLAT_CONFIG:
+ if key in FLAT_CONFIG:
new_config[key] = val.strip()
else:
failed_options.append(line)
if new_config:
- before = settings.FLAT_CONFIG
+ before = FLAT_CONFIG
matching_config = write_config_file(new_config)
- after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()}
+ after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()}
print(printable_config(matching_config))
side_effect_changes = {}
for key, val in after.items():
- if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
+ if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config):
side_effect_changes[key] = after[key]
# import ipdb; ipdb.set_trace()
@@ -1312,7 +1315,7 @@ def schedule(add: bool=False,
"""Set ArchiveBox to regularly import URLs at specific times using cron"""
check_data_folder()
- from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY
+ from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY
from archivebox.config.permissions import USER
Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True)
diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py
index b0322a1e..8a2894fe 100644
--- a/archivebox/misc/checks.py
+++ b/archivebox/misc/checks.py
@@ -201,6 +201,7 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True):
def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True):
+ import archivebox
from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP
from archivebox.misc.logging import STDERR
from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir
@@ -209,6 +210,8 @@ def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_ex
lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR
+ assert lib_dir == archivebox.pm.hook.get_LIB_DIR(), "lib_dir is not the same as the one in the flat config"
+
if not must_exist and not os.path.isdir(lib_dir):
return True
diff --git a/archivebox/misc/shell_welcome_message.py b/archivebox/misc/shell_welcome_message.py
index 5b85e6bd..26314dc0 100644
--- a/archivebox/misc/shell_welcome_message.py
+++ b/archivebox/misc/shell_welcome_message.py
@@ -23,7 +23,7 @@ from archivebox import CONSTANTS # noqa
from ..main import * # noqa
from ..cli import CLI_SUBCOMMANDS
-CONFIG = settings.FLAT_CONFIG
+CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys())
if __name__ == '__main__':
@@ -55,6 +55,5 @@ if __name__ == '__main__':
prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]')
prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]')
prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]')
- prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]')
prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]')
prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]')
diff --git a/archivebox/misc/util.py b/archivebox/misc/util.py
index a856fe64..6195252e 100644
--- a/archivebox/misc/util.py
+++ b/archivebox/misc/util.py
@@ -5,7 +5,7 @@ import requests
import json as pyjson
import http.cookiejar
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Callable
from pathlib import Path
from inspect import signature
from functools import wraps
@@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout
from base32_crockford import encode as base32_encode # type: ignore
from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding
try:
- import chardet
+ import chardet # type:ignore
detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"]
except ImportError:
detect_encoding = lambda rawdata: "utf-8"
-from archivebox.config import CONSTANTS
-from archivebox.config.common import ARCHIVING_CONFIG
+from archivebox.config.constants import CONSTANTS
from .logging import COLOR_DICT
@@ -126,6 +125,7 @@ def is_static_file(url: str):
def enforce_types(func):
"""
Enforce function arg and kwarg types at runtime using its python3 type hints
+ Simpler version of pydantic @validate_call decorator
"""
# TODO: check return type as well
@@ -186,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str:
@enforce_types
-def parse_date(date: Any) -> Optional[datetime]:
+def parse_date(date: Any) -> datetime:
"""Parse unix timestamps, iso format, and human-readable strings"""
if date is None:
- return None
+ return None # type: ignore
if isinstance(date, datetime):
if date.tzinfo is None:
@@ -212,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]:
def download_url(url: str, timeout: int=None) -> str:
"""Download the contents of a remote url and return the text"""
+ from archivebox.config.common import ARCHIVING_CONFIG
+
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
session = requests.Session()
@@ -241,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str:
return url.rsplit('/', 1)[-1]
@enforce_types
-def get_headers(url: str, timeout: int=None) -> str:
+def get_headers(url: str, timeout: int | None=None) -> str:
"""Download the contents of a remote url and return the headers"""
+ # TODO: get rid of this and use an abx pluggy hook instead
+
+ from archivebox.config.common import ARCHIVING_CONFIG
+
timeout = timeout or ARCHIVING_CONFIG.TIMEOUT
try:
@@ -283,6 +289,7 @@ def get_headers(url: str, timeout: int=None) -> str:
def ansi_to_html(text: str) -> str:
"""
Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html
+ Simple way to render colored CLI stdout/stderr in HTML properly, Textual/rich is probably better though.
"""
TEMPLATE = '
'
@@ -306,13 +313,13 @@ def ansi_to_html(text: str) -> str:
@enforce_types
def dedupe(options: List[str]) -> List[str]:
"""
- Deduplicates the given options. Options that come later clobber earlier
- conflicting options.
+ Deduplicates the given CLI args by key=value. Options that come later override earlier.
"""
deduped = {}
for option in options:
- deduped[option.split('=')[0]] = option
+ key = option.split('=')[0]
+ deduped[key] = option
return list(deduped.values())
@@ -344,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder):
elif cls_name in ('dict_items', 'dict_keys', 'dict_values'):
return tuple(obj)
+
+ elif isinstance(obj, Callable):
+ return str(obj)
return pyjson.JSONEncoder.default(self, obj)
diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py
index 3af7356b..3948ba18 100644
--- a/archivebox/parsers/generic_jsonl.py
+++ b/archivebox/parsers/generic_jsonl.py
@@ -1,14 +1,11 @@
__package__ = 'archivebox.parsers'
import json
-
from typing import IO, Iterable
-from ..index.schema import Link
-from archivebox.misc.util import (
- enforce_types,
-)
+from archivebox.misc.util import enforce_types
+from ..index.schema import Link
from .generic_json import jsonObjectToLink
def parse_line(line: str):
diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py
index 9b88d958..52dbba17 100644
--- a/archivebox/parsers/pocket_api.py
+++ b/archivebox/parsers/pocket_api.py
@@ -6,8 +6,7 @@ import re
from typing import IO, Iterable, Optional
from configparser import ConfigParser
-from pocket import Pocket
-
+import archivebox
from archivebox.config import CONSTANTS
from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write
@@ -22,7 +21,7 @@ API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db'
_BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))')
-def get_pocket_articles(api: Pocket, since=None, page=0):
+def get_pocket_articles(api, since=None, page=0):
body, headers = api.get(
state='archive',
sort='oldest',
@@ -94,7 +93,9 @@ def should_parse_as_pocket_api(text: str) -> bool:
def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Pocket API"""
- from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG
+ from pocket import Pocket
+
+ FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG()
input_buffer.seek(0)
pattern = re.compile(r"^pocket:\/\/(\w+)")
@@ -102,7 +103,7 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
if should_parse_as_pocket_api(line):
username = pattern.search(line).group(1)
- api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username])
+ api = Pocket(FLAT_CONFIG.POCKET_CONSUMER_KEY, FLAT_CONFIG.POCKET_ACCESS_TOKENS[username])
api.last_since = None
for article in get_pocket_articles(api, since=read_since(username)):
diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py
index ad464537..20a792f3 100644
--- a/archivebox/parsers/readwise_reader_api.py
+++ b/archivebox/parsers/readwise_reader_api.py
@@ -8,9 +8,10 @@ from datetime import datetime
from typing import IO, Iterable, Optional
from configparser import ConfigParser
+import abx
+
from archivebox.misc.util import enforce_types
from archivebox.misc.system import atomic_write
-from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG
from ..index.schema import Link
@@ -62,26 +63,30 @@ def link_from_article(article: dict, sources: list):
def write_cursor(username: str, since: str):
- if not READWISE_CONFIG.READWISE_DB_PATH.exists():
- atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
+ READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
+
+ if not READWISE_DB_PATH.exists():
+ atomic_write(READWISE_DB_PATH, "")
since_file = ConfigParser()
since_file.optionxform = str
- since_file.read(READWISE_CONFIG.READWISE_DB_PATH)
+ since_file.read(READWISE_DB_PATH)
since_file[username] = {"since": since}
- with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new:
+ with open(READWISE_DB_PATH, "w+") as new:
since_file.write(new)
def read_cursor(username: str) -> Optional[str]:
- if not READWISE_CONFIG.READWISE_DB_PATH.exists():
- atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "")
+ READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH
+
+ if not READWISE_DB_PATH.exists():
+ atomic_write(READWISE_DB_PATH, "")
config_file = ConfigParser()
config_file.optionxform = str
- config_file.read(READWISE_CONFIG.READWISE_DB_PATH)
+ config_file.read(READWISE_DB_PATH)
return config_file.get(username, "since", fallback=None)
@@ -97,12 +102,14 @@ def should_parse_as_readwise_reader_api(text: str) -> bool:
def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]:
"""Parse bookmarks from the Readwise Reader API"""
+ READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS
+
input_buffer.seek(0)
pattern = re.compile(r"^readwise-reader:\/\/(\w+)")
for line in input_buffer:
if should_parse_as_readwise_reader_api(line):
username = pattern.search(line).group(1)
- api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username))
+ api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username))
for article in get_readwise_reader_articles(api):
yield link_from_article(article, sources=[line])
diff --git a/archivebox/pkgs/__init__.py b/archivebox/pkgs/__init__.py
new file mode 100644
index 00000000..c5f4cc82
--- /dev/null
+++ b/archivebox/pkgs/__init__.py
@@ -0,0 +1,39 @@
+import sys
+import importlib
+from pathlib import Path
+
+PKGS_DIR = Path(__file__).parent
+
+VENDORED_PKGS = [
+ 'abx',
+ # 'pydantic-pkgr',
+]
+
+# scan ./pkgs and add all dirs present to list of available VENDORED_PKGS
+for subdir in reversed(sorted(PKGS_DIR.iterdir())):
+ if subdir.is_dir() and subdir.name not in VENDORED_PKGS and not subdir.name.startswith('_'):
+ VENDORED_PKGS.append(subdir.name)
+
+
+def load_vendored_pkgs():
+ """Add archivebox/vendor to sys.path and import all vendored libraries present within"""
+ if str(PKGS_DIR) not in sys.path:
+ sys.path.append(str(PKGS_DIR))
+
+ for pkg_name in VENDORED_PKGS:
+ pkg_dir = PKGS_DIR / pkg_name
+ assert pkg_dir.is_dir(), f'Required vendored pkg {pkg_name} could not be found in {pkg_dir}'
+
+ try:
+ lib = importlib.import_module(pkg_name)
+ # print(f"Successfully imported lib from environment {pkg_name}")
+ except ImportError:
+ sys.path.append(str(pkg_dir))
+ try:
+ lib = importlib.import_module(pkg_name)
+ # print(f"Successfully imported lib from vendored fallback {pkg_name}: {inspect.getfile(lib)}")
+ except ImportError as e:
+ print(f"Failed to import lib from environment or vendored fallback {pkg_name}: {e}", file=sys.stderr)
+ sys.exit(1)
+
+
diff --git a/archivebox/plugins_pkg/__init__.py b/archivebox/pkgs/abx-plugin-archivedotorg/README.md
similarity index 100%
rename from archivebox/plugins_pkg/__init__.py
rename to archivebox/pkgs/abx-plugin-archivedotorg/README.md
diff --git a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
new file mode 100644
index 00000000..025d83bf
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py
@@ -0,0 +1,21 @@
+__label__ = 'Archive.org'
+__homepage__ = 'https://archive.org'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import ARCHIVEDOTORG_CONFIG
+
+ return {
+ 'ARCHIVEDOTORG_CONFIG': ARCHIVEDOTORG_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import ARCHIVEDOTORG_EXTRACTOR
+#
+# return {
+# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
+# }
diff --git a/archivebox/extractors/archive_org.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
similarity index 100%
rename from archivebox/extractors/archive_org.py
rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py
diff --git a/archivebox/plugins_extractor/archivedotorg/config.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
similarity index 54%
rename from archivebox/plugins_extractor/archivedotorg/config.py
rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
index bebb6c98..f4c146ab 100644
--- a/archivebox/plugins_extractor/archivedotorg/config.py
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.archivedotorg'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class ArchivedotorgConfig(BaseConfigSet):
diff --git a/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml
new file mode 100644
index 00000000..36c91f3c
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-archivedotorg"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.24",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_archivedotorg = "abx_plugin_archivedotorg"
diff --git a/archivebox/plugins_search/__init__.py b/archivebox/pkgs/abx-plugin-chrome/README.md
similarity index 100%
rename from archivebox/plugins_search/__init__.py
rename to archivebox/pkgs/abx-plugin-chrome/README.md
diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
new file mode 100644
index 00000000..c300bd13
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py
@@ -0,0 +1,34 @@
+__label__ = 'Chrome'
+__author__ = 'ArchiveBox'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import CHROME_CONFIG
+
+ return {
+ 'CHROME_CONFIG': CHROME_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import CHROME_BINARY
+
+ return {
+ 'chrome': CHROME_BINARY,
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import CHROME_CONFIG
+ CHROME_CONFIG.validate()
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# return {
+# 'pdf': PDF_EXTRACTOR,
+# 'screenshot': SCREENSHOT_EXTRACTOR,
+# 'dom': DOM_EXTRACTOR,
+# }
diff --git a/archivebox/plugins_extractor/chrome/binaries.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
similarity index 82%
rename from archivebox/plugins_extractor/chrome/binaries.py
rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
index 59573d93..f315c992 100644
--- a/archivebox/plugins_extractor/chrome/binaries.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_extractor.chrome'
-
import os
import platform
from pathlib import Path
@@ -7,21 +5,22 @@ from typing import List, Optional
from pydantic import InstanceOf
from pydantic_pkgr import (
+ Binary,
BinProvider,
BinName,
BinaryOverrides,
bin_abspath,
)
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+import abx
-# Depends on Other Plugins:
-from archivebox.config.common import SHELL_CONFIG
-from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
-from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
+from abx_plugin_default_binproviders import apt, brew, env
+from abx_plugin_puppeteer.binproviders import PUPPETEER_BINPROVIDER
+from abx_plugin_playwright.binproviders import PLAYWRIGHT_BINPROVIDER
from .config import CHROME_CONFIG
+
CHROMIUM_BINARY_NAMES_LINUX = [
"chromium",
"chromium-browser",
@@ -48,12 +47,13 @@ CHROME_BINARY_NAMES_MACOS = [
]
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
-APT_DEPENDENCIES = [
- 'apt-transport-https', 'at-spi2-common', 'chromium-browser',
+CHROME_APT_DEPENDENCIES = [
+ 'apt-transport-https', 'at-spi2-common',
'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
+ 'chromium-browser',
]
@@ -80,7 +80,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path):
###################### Config ##########################
-class ChromeBinary(BaseBinary):
+class ChromeBinary(Binary):
name: BinName = CHROME_CONFIG.CHROME_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew]
@@ -95,7 +95,7 @@ class ChromeBinary(BaseBinary):
'packages': ['chromium'], # playwright install chromium
},
apt.name: {
- 'packages': APT_DEPENDENCIES,
+ 'packages': CHROME_APT_DEPENDENCIES,
},
brew.name: {
'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [],
@@ -104,10 +104,9 @@ class ChromeBinary(BaseBinary):
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
- from archivebox.config.common import STORAGE_CONFIG
- bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
+ bin_dir = bin_dir or abx.pm.hook.get_BIN_DIR()
- if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
+ if not (binary.abspath and os.path.isfile(binary.abspath)):
return
bin_dir.mkdir(parents=True, exist_ok=True)
@@ -121,7 +120,7 @@ class ChromeBinary(BaseBinary):
# otherwise on linux we can symlink directly to binary executable
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
- except Exception as err:
+ except Exception:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
@@ -132,14 +131,17 @@ class ChromeBinary(BaseBinary):
Cleans up any state or runtime files that chrome leaves behind when killed by
a timeout or other error
"""
- lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
-
- if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
- lock_file.unlink()
+ try:
+ linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
+ linux_lock_file.unlink(missing_ok=True)
+ except Exception:
+ pass
if CHROME_CONFIG.CHROME_USER_DATA_DIR:
- if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
- lock_file.unlink()
+ try:
+ (CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True)
+ except Exception:
+ pass
diff --git a/archivebox/plugins_extractor/chrome/config.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
similarity index 86%
rename from archivebox/plugins_extractor/chrome/config.py
rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
index a28c530f..6883cdd1 100644
--- a/archivebox/plugins_extractor/chrome/config.py
+++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_extractor.chrome'
-
import os
from pathlib import Path
from typing import List, Optional
@@ -7,8 +5,8 @@ from typing import List, Optional
from pydantic import Field
from pydantic_pkgr import bin_abspath
-from abx.archivebox.base_configset import BaseConfigSet
-from abx.archivebox.base_binary import env
+from abx_spec_config.base_configset import BaseConfigSet
+from abx_plugin_default_binproviders import env
from archivebox.config import CONSTANTS
from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG
@@ -81,15 +79,16 @@ class ChromeConfig(BaseConfigSet):
# Chrome Binary
CHROME_BINARY: str = Field(default='chrome')
CHROME_DEFAULT_ARGS: List[str] = Field(default=[
- '--virtual-time-budget=15000',
- '--disable-features=DarkMode',
- "--run-all-compositor-stages-before-draw",
- "--hide-scrollbars",
- "--autoplay-policy=no-user-gesture-required",
- "--no-first-run",
- "--use-fake-ui-for-media-stream",
- "--use-fake-device-for-media-stream",
- "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'",
+ "--no-first-run", # dont show any first run ui / setup prompts
+ '--virtual-time-budget=15000', # accellerate any animations on the page by 15s into the future
+ '--disable-features=DarkMode', # disable dark mode for archiving
+ "--run-all-compositor-stages-before-draw", # dont draw partially rendered content, wait until everything is ready
+ "--hide-scrollbars", # hide scrollbars to prevent layout shift / scrollbar visible in screenshots
+ "--autoplay-policy=no-user-gesture-required", # allow media autoplay without user gesture (e.g. on mobile)
+ "--use-fake-ui-for-media-stream", # provide fake camera if site tries to request camera access
+ "--use-fake-device-for-media-stream", # provide fake camera if site tries to request camera access
+ "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'", # ignore chrome updates
+ "--force-gpu-mem-available-mb=4096", # allows for longer full page screenshots https://github.com/puppeteer/puppeteer/issues/5530
])
CHROME_EXTRA_ARGS: List[str] = Field(default=[])
@@ -196,6 +195,7 @@ class ChromeConfig(BaseConfigSet):
cmd_args.append('--user-data-dir={}'.format(options.CHROME_USER_DATA_DIR))
cmd_args.append('--profile-directory={}'.format(options.CHROME_PROFILE_NAME or 'Default'))
+ # if CHROME_USER_DATA_DIR is set but folder is empty, create a new profile inside it
if not os.path.isfile(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME / 'Preferences'):
STDERR.print(f'[green] + creating new Chrome profile in: {pretty_path(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME)}[/green]')
cmd_args.remove('--no-first-run')
diff --git a/archivebox/extractors/dom.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py
similarity index 100%
rename from archivebox/extractors/dom.py
rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py
diff --git a/archivebox/extractors/pdf.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py
similarity index 100%
rename from archivebox/extractors/pdf.py
rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py
diff --git a/archivebox/extractors/screenshot.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
similarity index 100%
rename from archivebox/extractors/screenshot.py
rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py
diff --git a/archivebox/pkgs/abx-plugin-chrome/pyproject.toml b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml
new file mode 100644
index 00000000..da26078d
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-chrome"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_chrome = "abx_plugin_chrome"
diff --git a/archivebox/pkgs/abx-plugin-curl/README.md b/archivebox/pkgs/abx-plugin-curl/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py
new file mode 100644
index 00000000..7988ef5e
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py
@@ -0,0 +1,18 @@
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import CURL_CONFIG
+
+ return {
+ 'curl': CURL_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import CURL_BINARY
+
+ return {
+ 'curl': CURL_BINARY,
+ }
diff --git a/archivebox/plugins_extractor/curl/binaries.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
similarity index 57%
rename from archivebox/plugins_extractor/curl/binaries.py
rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
index 41ff9616..32628248 100644
--- a/archivebox/plugins_extractor/curl/binaries.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.curl'
+__package__ = 'abx_plugin_curl'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import CURL_CONFIG
-class CurlBinary(BaseBinary):
+class CurlBinary(Binary):
name: BinName = CURL_CONFIG.CURL_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/plugins_extractor/curl/config.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py
similarity index 90%
rename from archivebox/plugins_extractor/curl/config.py
rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py
index 14996f66..69f4a637 100644
--- a/archivebox/plugins_extractor/curl/config.py
+++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_extractor.curl'
+__package__ = 'abx_plugin_curl'
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/extractors/headers.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py
similarity index 100%
rename from archivebox/extractors/headers.py
rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py
diff --git a/archivebox/pkgs/abx-plugin-curl/pyproject.toml b/archivebox/pkgs/abx-plugin-curl/pyproject.toml
new file mode 100644
index 00000000..f3c6ad55
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-curl/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-curl"
+version = "2024.10.24"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_curl = "abx_plugin_curl"
diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/README.md b/archivebox/pkgs/abx-plugin-default-binproviders/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
new file mode 100644
index 00000000..58dbdac9
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py
@@ -0,0 +1,23 @@
+
+import abx
+
+from typing import Dict
+
+from pydantic_pkgr import (
+ AptProvider,
+ BrewProvider,
+ EnvProvider,
+ BinProvider,
+)
+apt = APT_BINPROVIDER = AptProvider()
+brew = BREW_BINPROVIDER = BrewProvider()
+env = ENV_BINPROVIDER = EnvProvider()
+
+
+@abx.hookimpl(tryfirst=True)
+def get_BINPROVIDERS() -> Dict[str, BinProvider]:
+ return {
+ 'apt': APT_BINPROVIDER,
+ 'brew': BREW_BINPROVIDER,
+ 'env': ENV_BINPROVIDER,
+ }
diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
new file mode 100644
index 00000000..3f8fec96
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-default-binproviders"
+version = "2024.10.24"
+description = "Default BinProviders for ABX (apt, brew, env)"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_default_binproviders = "abx_plugin_default_binproviders"
diff --git a/archivebox/pkgs/abx-plugin-favicon/README.md b/archivebox/pkgs/abx-plugin-favicon/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
new file mode 100644
index 00000000..75004e3d
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py
@@ -0,0 +1,29 @@
+__label__ = 'Favicon'
+__version__ = '2024.10.24'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/ArchiveBox/archivebox'
+__dependencies__ = [
+ 'abx>=0.1.0',
+ 'abx-spec-config>=0.1.0',
+ 'abx-plugin-curl-extractor>=2024.10.24',
+]
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import FAVICON_CONFIG
+
+ return {
+ 'FAVICON_CONFIG': FAVICON_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import FAVICON_EXTRACTOR
+
+# return {
+# 'favicon': FAVICON_EXTRACTOR,
+# }
diff --git a/archivebox/plugins_extractor/favicon/config.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py
similarity index 64%
rename from archivebox/plugins_extractor/favicon/config.py
rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py
index 6073ef87..8b97d758 100644
--- a/archivebox/plugins_extractor/favicon/config.py
+++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.favicon'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class FaviconConfig(BaseConfigSet):
diff --git a/archivebox/extractors/favicon.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py
similarity index 100%
rename from archivebox/extractors/favicon.py
rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py
diff --git a/archivebox/pkgs/abx-plugin-favicon/pyproject.toml b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml
new file mode 100644
index 00000000..cad10890
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-favicon"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.28",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_favicon = "abx_plugin_favicon"
diff --git a/archivebox/pkgs/abx-plugin-git/README.md b/archivebox/pkgs/abx-plugin-git/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py
new file mode 100644
index 00000000..61c04b9c
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py
@@ -0,0 +1,29 @@
+__package__ = 'abx_plugin_git'
+__label__ = 'Git'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import GIT_CONFIG
+
+ return {
+ 'GIT_CONFIG': GIT_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import GIT_BINARY
+
+ return {
+ 'git': GIT_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import GIT_EXTRACTOR
+
+ return {
+ 'git': GIT_EXTRACTOR,
+ }
diff --git a/archivebox/plugins_extractor/git/binaries.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
similarity index 57%
rename from archivebox/plugins_extractor/git/binaries.py
rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
index 8d990769..f352fd99 100644
--- a/archivebox/plugins_extractor/git/binaries.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.git'
+__package__ = 'abx_plugin_git'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import GIT_CONFIG
-class GitBinary(BaseBinary):
+class GitBinary(Binary):
name: BinName = GIT_CONFIG.GIT_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/plugins_extractor/git/config.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py
similarity index 87%
rename from archivebox/plugins_extractor/git/config.py
rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py
index 3d890d62..d8a9ca17 100644
--- a/archivebox/plugins_extractor/git/config.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_extractor.git'
+__package__ = 'abx_plugin_git'
from typing import List
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
new file mode 100644
index 00000000..4863d031
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py
@@ -0,0 +1,15 @@
+__package__ = 'abx_plugin_git'
+
+# from pathlib import Path
+
+# from .binaries import GIT_BINARY
+
+
+# class GitExtractor(BaseExtractor):
+# name: ExtractorName = 'git'
+# binary: str = GIT_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# return snapshot.as_link() / 'git'
+
+# GIT_EXTRACTOR = GitExtractor()
diff --git a/archivebox/extractors/git.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py
similarity index 95%
rename from archivebox/extractors/git.py
rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py
index 9ac71d3e..128ba0e7 100644
--- a/archivebox/extractors/git.py
+++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py
@@ -16,8 +16,8 @@ from archivebox.misc.util import (
from ..logging_util import TimedProgress
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
-from archivebox.plugins_extractor.git.config import GIT_CONFIG
-from archivebox.plugins_extractor.git.binaries import GIT_BINARY
+from abx_plugin_git.config import GIT_CONFIG
+from abx_plugin_git.binaries import GIT_BINARY
def get_output_path():
diff --git a/archivebox/pkgs/abx-plugin-git/pyproject.toml b/archivebox/pkgs/abx-plugin-git/pyproject.toml
new file mode 100644
index 00000000..384599b7
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-git/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-git"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-plugin-default-binproviders>=2024.10.24",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_git = "abx_plugin_git"
diff --git a/archivebox/pkgs/abx-plugin-htmltotext/README.md b/archivebox/pkgs/abx-plugin-htmltotext/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
new file mode 100644
index 00000000..ebbc6800
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py
@@ -0,0 +1,22 @@
+__package__ = 'abx_plugin_htmltotext'
+__label__ = 'HTML-to-Text'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import HTMLTOTEXT_CONFIG
+
+ return {
+ 'HTMLTOTEXT_CONFIG': HTMLTOTEXT_CONFIG
+ }
+
+
+# @abx.hookimpl
+# def get_EXTRACTORS():
+# from .extractors import FAVICON_EXTRACTOR
+
+# return {
+# 'htmltotext': FAVICON_EXTRACTOR,
+# }
diff --git a/archivebox/plugins_extractor/htmltotext/config.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
similarity index 52%
rename from archivebox/plugins_extractor/htmltotext/config.py
rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
index 31b9bff5..bd3aabc6 100644
--- a/archivebox/plugins_extractor/htmltotext/config.py
+++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_extractor.htmltotext'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class HtmltotextConfig(BaseConfigSet):
diff --git a/archivebox/extractors/htmltotext.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py
similarity index 100%
rename from archivebox/extractors/htmltotext.py
rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py
diff --git a/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml b/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml
new file mode 100644
index 00000000..46ebaa46
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-plugin-htmltotext"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_htmltotext = "abx_plugin_htmltotext"
diff --git a/archivebox/pkgs/abx-plugin-ldap-auth/README.md b/archivebox/pkgs/abx-plugin-ldap-auth/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_auth/ldap/__init__.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
similarity index 68%
rename from archivebox/plugins_auth/ldap/__init__.py
rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
index 6ba43b90..d4ac6431 100644
--- a/archivebox/plugins_auth/ldap/__init__.py
+++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py
@@ -1,36 +1,15 @@
-__package__ = 'plugins_auth.ldap'
-__id__ = 'ldap'
+__package__ = 'abx_plugin_ldap_auth'
__label__ = 'LDAP'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/django-auth-ldap/django-auth-ldap'
-__dependencies__ = ['pip']
import abx
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-
-
@abx.hookimpl
def get_CONFIG():
from .config import LDAP_CONFIG
return {
- __id__: LDAP_CONFIG
+ 'LDAP_CONFIG': LDAP_CONFIG
}
@abx.hookimpl
@@ -48,12 +27,12 @@ def create_superuser_from_ldap_user(sender, user=None, ldap_user=None, **kwargs)
ArchiveBox requires staff/superuser status to view the admin at all, so we must create a user
+ set staff and superuser when LDAP authenticates a new person.
"""
- from django.conf import settings
+ from .config import LDAP_CONFIG
if user is None:
return # not authenticated at all
- if not user.id and settings.CONFIGS.ldap.LDAP_CREATE_SUPERUSER:
+ if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER:
user.is_superuser = True # authenticated via LDAP, but user is not set up in DB yet
user.is_staff = True
@@ -69,9 +48,7 @@ def ready():
LDAP_CONFIG.validate()
- from django.conf import settings
-
- if settings.CONFIGS.ldap.LDAP_ENABLED:
+ if LDAP_CONFIG.LDAP_ENABLED:
# tell django-auth-ldap to call our function when a user is authenticated via LDAP
import django_auth_ldap.backend
django_auth_ldap.backend.populate_user.connect(create_superuser_from_ldap_user)
diff --git a/archivebox/plugins_auth/ldap/binaries.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
similarity index 78%
rename from archivebox/plugins_auth/ldap/binaries.py
rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
index cc932183..8ea4776d 100644
--- a/archivebox/plugins_auth/ldap/binaries.py
+++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py
@@ -1,5 +1,4 @@
-__package__ = 'plugins_auth.ldap'
-
+__package__ = 'abx_plugin_ldap_auth'
import inspect
@@ -7,12 +6,10 @@ from typing import List
from pathlib import Path
from pydantic import InstanceOf
-from pydantic_pkgr import BinaryOverrides, SemVer
+from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider
-
-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, apt
-
-from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
+from abx_plugin_default_binproviders import apt
+from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES
from .config import get_ldap_lib
@@ -39,10 +36,10 @@ def get_LDAP_LIB_version():
return LDAP_LIB and SemVer(LDAP_LIB.__version__)
-class LdapBinary(BaseBinary):
+class LdapBinary(Binary):
name: str = 'ldap'
description: str = 'LDAP Authentication'
- binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt]
+ binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt]
overrides: BinaryOverrides = {
LIB_PIP_BINPROVIDER.name: {
diff --git a/archivebox/plugins_auth/ldap/config.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
similarity index 96%
rename from archivebox/plugins_auth/ldap/config.py
rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
index 2094dc68..451c9da8 100644
--- a/archivebox/plugins_auth/ldap/config.py
+++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_auth.ldap'
+__package__ = 'abx_plugin_ldap_auth'
import sys
from typing import Dict, List, Optional
-from pydantic import Field, model_validator, computed_field
+from pydantic import Field, computed_field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
LDAP_LIB = None
LDAP_SEARCH = None
diff --git a/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml b/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml
new file mode 100644
index 00000000..a89d0cbc
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-ldap-auth"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-django>=0.1.0",
+]
+
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+
+[project.entry-points.abx]
+abx_plugin_ldap_auth = "abx_plugin_ldap_auth"
diff --git a/archivebox/pkgs/abx-plugin-mercury/README.md b/archivebox/pkgs/abx-plugin-mercury/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py
new file mode 100644
index 00000000..7b6fcfd6
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py
@@ -0,0 +1,29 @@
+__package__ = 'abx_plugin_mercury'
+__label__ = 'Postlight Parser'
+__homepage__ = 'https://github.com/postlight/mercury-parser'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import MERCURY_CONFIG
+
+ return {
+ 'MERCURY_CONFIG': MERCURY_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import MERCURY_BINARY
+
+ return {
+ 'mercury': MERCURY_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import MERCURY_EXTRACTOR
+
+ return {
+ 'mercury': MERCURY_EXTRACTOR,
+ }
diff --git a/archivebox/plugins_extractor/mercury/binaries.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py
similarity index 78%
rename from archivebox/plugins_extractor/mercury/binaries.py
rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py
index b07055fd..f015a7ca 100644
--- a/archivebox/plugins_extractor/mercury/binaries.py
+++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py
@@ -1,18 +1,18 @@
-__package__ = 'plugins_extractor.mercury'
+__package__ = 'abx_plugin_mercury'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath
+from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
-from abx.archivebox.base_binary import BaseBinary, env
+from abx_plugin_default_binproviders import env
-from archivebox.plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import MERCURY_CONFIG
-class MercuryBinary(BaseBinary):
+class MercuryBinary(Binary):
name: BinName = MERCURY_CONFIG.MERCURY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/archivebox/plugins_extractor/mercury/config.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py
similarity index 90%
rename from archivebox/plugins_extractor/mercury/config.py
rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py
index 49c92b73..00fa82a4 100644
--- a/archivebox/plugins_extractor/mercury/config.py
+++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_extractor.mercury'
+__package__ = 'abx_plugin_mercury'
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py
new file mode 100644
index 00000000..36a17f3a
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py
@@ -0,0 +1,17 @@
+__package__ = 'abx_plugin_mercury'
+
+# from pathlib import Path
+
+# from .binaries import MERCURY_BINARY
+
+
+
+# class MercuryExtractor(BaseExtractor):
+# name: ExtractorName = 'mercury'
+# binary: str = MERCURY_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# return snapshot.link_dir / 'mercury' / 'content.html'
+
+
+# MERCURY_EXTRACTOR = MercuryExtractor()
diff --git a/archivebox/extractors/mercury.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py
similarity index 100%
rename from archivebox/extractors/mercury.py
rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py
diff --git a/archivebox/pkgs/abx-plugin-mercury/pyproject.toml b/archivebox/pkgs/abx-plugin-mercury/pyproject.toml
new file mode 100644
index 00000000..c740008b
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-mercury/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-plugin-mercury"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_mercury = "abx_plugin_mercury"
diff --git a/archivebox/pkgs/abx-plugin-npm/README.md b/archivebox/pkgs/abx-plugin-npm/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_pkg/npm/__init__.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py
similarity index 58%
rename from archivebox/plugins_pkg/npm/__init__.py
rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py
index 921d42e4..d1f56f35 100644
--- a/archivebox/plugins_pkg/npm/__init__.py
+++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py
@@ -1,32 +1,15 @@
-__package__ = 'plugins_pkg.npm'
-__version__ = '2024.10.14'
-__id__ = 'npm'
-__label__ = 'npm'
+__label__ = 'NPM'
__author__ = 'ArchiveBox'
__homepage__ = 'https://www.npmjs.com/'
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import NPM_CONFIG
-
return {
- __id__: NPM_CONFIG,
+ 'NPM_CONFIG': NPM_CONFIG,
}
@abx.hookimpl
diff --git a/archivebox/plugins_pkg/npm/binaries.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py
similarity index 72%
rename from archivebox/plugins_pkg/npm/binaries.py
rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py
index dd9e6214..4f44fc4a 100644
--- a/archivebox/plugins_pkg/npm/binaries.py
+++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py
@@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm'
from typing import List
from pydantic import InstanceOf
+from benedict import benedict
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
+from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
+
+from abx_plugin_default_binproviders import get_BINPROVIDERS
+
+DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
+env = DEFAULT_BINPROVIDERS.env
+apt = DEFAULT_BINPROVIDERS.apt
+brew = DEFAULT_BINPROVIDERS.brew
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
-
-
-class NodeBinary(BaseBinary):
+class NodeBinary(Binary):
name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@@ -23,7 +28,7 @@ class NodeBinary(BaseBinary):
NODE_BINARY = NodeBinary()
-class NpmBinary(BaseBinary):
+class NpmBinary(Binary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@@ -35,7 +40,7 @@ class NpmBinary(BaseBinary):
NPM_BINARY = NpmBinary()
-class NpxBinary(BaseBinary):
+class NpxBinary(Binary):
name: BinName = 'npx'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py
new file mode 100644
index 00000000..dd56e3a9
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py
@@ -0,0 +1,38 @@
+import os
+from pathlib import Path
+from typing import Optional
+
+from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
+
+import abx
+
+DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm')
+
+OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin'
+NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin'
+
+
+class SystemNpmBinProvider(NpmProvider):
+ name: BinProviderName = "sys_npm"
+
+ npm_prefix: Optional[Path] = None
+
+
+class LibNpmBinProvider(NpmProvider):
+ name: BinProviderName = "lib_npm"
+ PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
+
+ npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR
+
+ def setup(self) -> None:
+ # update paths from config at runtime
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
+ self.npm_prefix = LIB_DIR / 'npm'
+ self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
+
+ super().setup()
+
+
+SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
+LIB_NPM_BINPROVIDER = LibNpmBinProvider()
+npm = LIB_NPM_BINPROVIDER
diff --git a/archivebox/plugins_pkg/npm/config.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py
similarity index 79%
rename from archivebox/plugins_pkg/npm/config.py
rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py
index f69cfdd2..b937ed27 100644
--- a/archivebox/plugins_pkg/npm/config.py
+++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py
@@ -1,7 +1,4 @@
-__package__ = 'plugins_pkg.npm'
-
-
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config import BaseConfigSet
###################### Config ##########################
diff --git a/archivebox/pkgs/abx-plugin-npm/pyproject.toml b/archivebox/pkgs/abx-plugin-npm/pyproject.toml
new file mode 100644
index 00000000..1371b2c4
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-npm/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-npm"
+version = "2024.10.24"
+description = "NPM binary provider plugin for ABX"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-default-binproviders>=2024.10.24",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_npm = "abx_plugin_npm"
diff --git a/archivebox/pkgs/abx-plugin-pip/README.md b/archivebox/pkgs/abx-plugin-pip/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_pkg/pip/.plugin_order b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order
similarity index 100%
rename from archivebox/plugins_pkg/pip/.plugin_order
rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order
diff --git a/archivebox/plugins_pkg/pip/__init__.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py
similarity index 62%
rename from archivebox/plugins_pkg/pip/__init__.py
rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py
index c1be27b1..eebcdb5b 100644
--- a/archivebox/plugins_pkg/pip/__init__.py
+++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py
@@ -1,33 +1,18 @@
-__package__ = 'plugins_pkg.pip'
-__label__ = 'pip'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/pypa/pip'
+__package__ = 'abx_plugin_pip'
+__label__ = 'PIP'
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'pip': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import PIP_CONFIG
return {
- 'pip': PIP_CONFIG
+ 'PIP_CONFIG': PIP_CONFIG
}
-@abx.hookimpl
+@abx.hookimpl(tryfirst=True)
def get_BINARIES():
from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY
diff --git a/archivebox/plugins_pkg/pip/binaries.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py
similarity index 84%
rename from archivebox/plugins_pkg/pip/binaries.py
rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py
index 3e451cfe..18e5f34f 100644
--- a/archivebox/plugins_pkg/pip/binaries.py
+++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py
@@ -1,4 +1,4 @@
-__package__ = 'plugins_pkg.pip'
+__package__ = 'abx_plugin_pip'
import sys
from pathlib import Path
@@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator
import django
import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer
+from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
-from archivebox import VERSION
-from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
-
-from archivebox.misc.logging import hint
-
-from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
+from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew
###################### Config ##########################
+def get_archivebox_version():
+ try:
+ from archivebox import VERSION
+ return VERSION
+ except Exception:
+ return None
-class ArchiveboxBinary(BaseBinary):
+class ArchiveboxBinary(Binary):
name: BinName = 'archivebox'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
overrides: BinaryOverrides = {
- VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
- SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
- apt.name: {'packages': [], 'version': VERSION},
- brew.name: {'packages': [], 'version': VERSION},
+ VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
+ SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
+ apt.name: {'packages': [], 'version': get_archivebox_version},
+ brew.name: {'packages': [], 'version': get_archivebox_version},
}
# @validate_call
@@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary):
ARCHIVEBOX_BINARY = ArchiveboxBinary()
-class PythonBinary(BaseBinary):
+class PythonBinary(Binary):
name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__)
LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version)
LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
-class SqliteBinary(BaseBinary):
+class SqliteBinary(Binary):
name: BinName = 'sqlite'
- binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
+ binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
@@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary):
cursor.execute('SELECT JSON(\'{"a": "b"}\')')
except django_sqlite3.OperationalError as exc:
print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]')
- hint([
- 'Upgrade your Python version or install the extension manually:',
- 'https://code.djangoproject.com/wiki/JSON1Extension'
- ])
+ print(
+ '[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' +
+ ' https://code.djangoproject.com/wiki/JSON1Extension\n'
+ )
return self
# @validate_call
@@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__)
LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3])
LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
-class DjangoBinary(BaseBinary):
+class DjangoBinary(Binary):
name: BinName = 'django'
- binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
+ binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
@@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary):
DJANGO_BINARY = DjangoBinary()
-class PipBinary(BaseBinary):
+class PipBinary(Binary):
name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@@ -154,7 +155,7 @@ class PipBinary(BaseBinary):
PIP_BINARY = PipBinary()
-class PipxBinary(BaseBinary):
+class PipxBinary(Binary):
name: BinName = "pipx"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
diff --git a/archivebox/plugins_pkg/pip/binproviders.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py
similarity index 76%
rename from archivebox/plugins_pkg/pip/binproviders.py
rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py
index e51dc780..c29798b0 100644
--- a/archivebox/plugins_pkg/pip/binproviders.py
+++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py
@@ -1,21 +1,26 @@
-__package__ = 'plugins_pkg.pip'
-
import os
import sys
import site
from pathlib import Path
from typing import Optional
+from benedict import benedict
+
from pydantic_pkgr import PipProvider, BinName, BinProviderName
-from archivebox.config import CONSTANTS
+import abx
-from abx.archivebox.base_binary import BaseBinProvider
+from abx_plugin_default_binproviders import get_BINPROVIDERS
+
+DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
+env = DEFAULT_BINPROVIDERS.env
+apt = DEFAULT_BINPROVIDERS.apt
+brew = DEFAULT_BINPROVIDERS.brew
###################### Config ##########################
-class SystemPipBinProvider(PipProvider, BaseBinProvider):
+class SystemPipBinProvider(PipProvider):
name: BinProviderName = "sys_pip"
INSTALLER_BIN: BinName = "pip"
@@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider):
# never modify system pip packages
return 'refusing to install packages globally with system pip, use a venv instead'
-class SystemPipxBinProvider(PipProvider, BaseBinProvider):
+class SystemPipxBinProvider(PipProvider):
name: BinProviderName = "pipx"
INSTALLER_BIN: BinName = "pipx"
@@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider):
IS_INSIDE_VENV = sys.prefix != sys.base_prefix
-class VenvPipBinProvider(PipProvider, BaseBinProvider):
+class VenvPipBinProvider(PipProvider):
name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip"
@@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider):
return None
-class LibPipBinProvider(PipProvider, BaseBinProvider):
+class LibPipBinProvider(PipProvider):
name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip"
- pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv'
+ pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv')
def setup(self) -> None:
- # update paths from config if they arent the default
- from archivebox.config.common import STORAGE_CONFIG
- if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
- self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
-
+ # update venv path to match most up-to-date LIB_DIR based on runtime config
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
+ self.pip_venv = LIB_DIR / 'pip' / 'venv'
super().setup()
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
diff --git a/archivebox/plugins_pkg/pip/config.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py
similarity index 86%
rename from archivebox/plugins_pkg/pip/config.py
rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py
index 26cf0f8e..f7464810 100644
--- a/archivebox/plugins_pkg/pip/config.py
+++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py
@@ -3,7 +3,7 @@ __package__ = 'pip'
from typing import List, Optional
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
class PipDependencyConfigs(BaseConfigSet):
diff --git a/archivebox/pkgs/abx-plugin-pip/pyproject.toml b/archivebox/pkgs/abx-plugin-pip/pyproject.toml
new file mode 100644
index 00000000..03f88d0b
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-pip/pyproject.toml
@@ -0,0 +1,22 @@
+[project]
+name = "abx-plugin-pip"
+version = "2024.10.24"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-plugin-default-binproviders>=2024.10.24",
+ "django>=5.0.0",
+]
+
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_pip = "abx_plugin_pip"
diff --git a/archivebox/pkgs/abx-plugin-playwright/README.md b/archivebox/pkgs/abx-plugin-playwright/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_pkg/playwright/__init__.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py
similarity index 52%
rename from archivebox/plugins_pkg/playwright/__init__.py
rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py
index 0f66f42c..6d3ed715 100644
--- a/archivebox/plugins_pkg/playwright/__init__.py
+++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py
@@ -1,30 +1,14 @@
-__package__ = 'plugins_pkg.playwright'
-__label__ = 'playwright'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
+__label__ = 'Playwright'
__homepage__ = 'https://github.com/microsoft/playwright-python'
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'playwright': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import PLAYWRIGHT_CONFIG
-
return {
- 'playwright': PLAYWRIGHT_CONFIG
+ 'PLAYWRIGHT_CONFIG': PLAYWRIGHT_CONFIG
}
@abx.hookimpl
diff --git a/archivebox/plugins_pkg/playwright/binaries.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py
similarity index 54%
rename from archivebox/plugins_pkg/playwright/binaries.py
rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py
index 0ef63646..4b77d9d4 100644
--- a/archivebox/plugins_pkg/playwright/binaries.py
+++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py
@@ -1,20 +1,18 @@
-__package__ = 'plugins_pkg.playwright'
+__package__ = 'abx_plugin_playwright'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinName, BinProvider
+from pydantic_pkgr import BinName, BinProvider, Binary
-from abx.archivebox.base_binary import BaseBinary, env
-from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
+from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
+from abx_plugin_default_binproviders import env
from .config import PLAYWRIGHT_CONFIG
-
-
-class PlaywrightBinary(BaseBinary):
+class PlaywrightBinary(Binary):
name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
diff --git a/archivebox/plugins_pkg/playwright/binproviders.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
similarity index 79%
rename from archivebox/plugins_pkg/playwright/binproviders.py
rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
index cb8d35cb..972cb11a 100644
--- a/archivebox/plugins_pkg/playwright/binproviders.py
+++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py
@@ -1,13 +1,15 @@
-__package__ = 'plugins_pkg.playwright'
+__package__ = 'abx_plugin_playwright'
import os
+import shutil
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
-from pydantic import computed_field, Field
+from pydantic import Field
from pydantic_pkgr import (
BinName,
+ BinProvider,
BinProviderName,
BinProviderOverrides,
InstallArgs,
@@ -18,25 +20,31 @@ from pydantic_pkgr import (
DEFAULT_ENV_PATH,
)
-from archivebox.config import CONSTANTS
+import abx
-from abx.archivebox.base_binary import BaseBinProvider, env
+from abx_plugin_default_binproviders import env
-from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER
from .binaries import PLAYWRIGHT_BINARY
-
+USER_PLAYWRIGHT_CACHE_DIR: str | None = os.environ.get("PLAYWRIGHT_BROWSERS_PATH", None)
MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright")
LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright")
+PLAYWRIGHT_CACHE_DIR: Path = Path(USER_PLAYWRIGHT_CACHE_DIR) if USER_PLAYWRIGHT_CACHE_DIR else (
+ MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
+ if OPERATING_SYSTEM == "darwin" else
+ LINUX_PLAYWRIGHT_CACHE_DIR.expanduser()
+)
-class PlaywrightBinProvider(BaseBinProvider):
+
+class PlaywrightBinProvider(BinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
- PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
+ PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}"
+ playwright_browsers_dir: Path = PLAYWRIGHT_CACHE_DIR
playwright_install_args: List[str] = ["install"]
packages_handler: BinProviderOverrides = Field(default={
@@ -45,39 +53,20 @@ class PlaywrightBinProvider(BaseBinProvider):
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
- @computed_field
@property
def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None:
try:
return PLAYWRIGHT_BINARY.load().abspath
- except Exception as e:
+ except Exception:
return None
- @property
- def playwright_browsers_dir(self) -> Path:
- # The directory where playwright stores browsers can be overridden with
- # the "PLAYWRIGHT_BROWSERS_PATH" environment variable; if it's present
- # and a directory, we should use that. See the playwright documentation
- # for more details:
- # https://playwright.dev/docs/browsers#managing-browser-binaries
- dir_path = os.environ.get("PLAYWRIGHT_BROWSERS_PATH")
- if dir_path and os.path.isdir(dir_path):
- return Path(dir_path)
-
- # Otherwise return the default path based on the operating system.
- return (
- MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
- if OPERATING_SYSTEM == "darwin" else
- LINUX_PLAYWRIGHT_CACHE_DIR.expanduser()
- )
-
def setup(self) -> None:
- # update paths from config if they arent the default
- from archivebox.config.common import STORAGE_CONFIG
- if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
- self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
+ # update paths from config at runtime
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
+
+ self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
- assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
+ assert shutil.which('pip'), "Pip bin provider not initialized"
if self.playwright_browsers_dir:
self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)
diff --git a/archivebox/plugins_pkg/playwright/config.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py
similarity index 59%
rename from archivebox/plugins_pkg/playwright/config.py
rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py
index 23f22efc..0c7c6a50 100644
--- a/archivebox/plugins_pkg/playwright/config.py
+++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py
@@ -1,7 +1,4 @@
-__package__ = 'playwright'
-
-from abx.archivebox.base_configset import BaseConfigSet
-
+from abx_spec_config import BaseConfigSet
class PlaywrightConfigs(BaseConfigSet):
PLAYWRIGHT_BINARY: str = 'playwright'
diff --git a/archivebox/pkgs/abx-plugin-playwright/pyproject.toml b/archivebox/pkgs/abx-plugin-playwright/pyproject.toml
new file mode 100644
index 00000000..0ad0d995
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-playwright/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-playwright"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "pydantic>=2.4.2",
+ "pydantic-pkgr>=0.5.4",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_playwright = "abx_plugin_playwright"
diff --git a/archivebox/pkgs/abx-plugin-pocket/README.md b/archivebox/pkgs/abx-plugin-pocket/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py
new file mode 100644
index 00000000..09e5dc8f
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py
@@ -0,0 +1,18 @@
+__package__ = 'abx_plugin_pocket'
+__label__ = 'Pocket'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import POCKET_CONFIG
+
+ return {
+ 'POCKET_CONFIG': POCKET_CONFIG
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import POCKET_CONFIG
+ POCKET_CONFIG.validate()
diff --git a/archivebox/plugins_extractor/pocket/config.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py
similarity index 62%
rename from archivebox/plugins_extractor/pocket/config.py
rename to archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py
index 7866a1f6..31f691b2 100644
--- a/archivebox/plugins_extractor/pocket/config.py
+++ b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py
@@ -1,15 +1,12 @@
-__package__ = 'plugins_extractor.pocket'
-
from typing import Dict
-
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config import BaseConfigSet
class PocketConfig(BaseConfigSet):
POCKET_CONSUMER_KEY: str | None = Field(default=None)
- POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...}
+ POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=dict) # {: , ...}
POCKET_CONFIG = PocketConfig()
diff --git a/archivebox/pkgs/abx-plugin-pocket/pyproject.toml b/archivebox/pkgs/abx-plugin-pocket/pyproject.toml
new file mode 100644
index 00000000..999fa098
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-pocket/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-pocket"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "pocket>=0.3.6",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_pocket = "abx_plugin_pocket"
diff --git a/archivebox/pkgs/abx-plugin-puppeteer/README.md b/archivebox/pkgs/abx-plugin-puppeteer/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py
new file mode 100644
index 00000000..1ee876d6
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py
@@ -0,0 +1,30 @@
+__package__ = 'abx_plugin_puppeteer'
+__label__ = 'Puppeteer'
+__homepage__ = 'https://github.com/puppeteer/puppeteer'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import PUPPETEER_CONFIG
+
+ return {
+ 'PUPPETEER_CONFIG': PUPPETEER_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import PUPPETEER_BINARY
+
+ return {
+ 'puppeteer': PUPPETEER_BINARY,
+ }
+
+@abx.hookimpl
+def get_BINPROVIDERS():
+ from .binproviders import PUPPETEER_BINPROVIDER
+
+ return {
+ 'puppeteer': PUPPETEER_BINPROVIDER,
+ }
diff --git a/archivebox/plugins_pkg/puppeteer/binaries.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
similarity index 54%
rename from archivebox/plugins_pkg/puppeteer/binaries.py
rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
index 7e592bba..8afd484f 100644
--- a/archivebox/plugins_pkg/puppeteer/binaries.py
+++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py
@@ -1,20 +1,20 @@
-__package__ = 'plugins_pkg.puppeteer'
+__package__ = 'abx_plugin_puppeteer'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env
+from abx_plugin_default_binproviders import env
-from plugins_pkg.npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER
###################### Config ##########################
-class PuppeteerBinary(BaseBinary):
+class PuppeteerBinary(Binary):
name: BinName = "puppeteer"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/archivebox/plugins_pkg/puppeteer/binproviders.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
similarity index 91%
rename from archivebox/plugins_pkg/puppeteer/binproviders.py
rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
index 2ef0eb7a..e7b697bd 100644
--- a/archivebox/plugins_pkg/puppeteer/binproviders.py
+++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py
@@ -1,5 +1,3 @@
-__package__ = 'plugins_pkg.puppeteer'
-
import os
import platform
from pathlib import Path
@@ -7,6 +5,7 @@ from typing import List, Optional, Dict, ClassVar
from pydantic import Field
from pydantic_pkgr import (
+ BinProvider,
BinName,
BinProviderName,
BinProviderOverrides,
@@ -15,15 +14,15 @@ from pydantic_pkgr import (
HostBinPath,
)
+import abx
+
from archivebox.config import CONSTANTS
from archivebox.config.permissions import ARCHIVEBOX_USER
-from abx.archivebox.base_binary import BaseBinProvider
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER
-class PuppeteerBinProvider(BaseBinProvider):
+class PuppeteerBinProvider(BinProvider):
name: BinProviderName = "puppeteer"
INSTALLER_BIN: BinName = "npx"
@@ -42,10 +41,12 @@ class PuppeteerBinProvider(BaseBinProvider):
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
def setup(self) -> None:
- # update paths from config
- from archivebox.config.common import STORAGE_CONFIG
- self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
- self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
+ # update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time
+ # we want to avoid depending on archivebox from abx code if at all possible
+ LIB_DIR = abx.pm.hook.get_LIB_DIR()
+ BIN_DIR = abx.pm.hook.get_BIN_DIR()
+ self.puppeteer_browsers_dir = LIB_DIR / 'browsers'
+ self.PATH = str(BIN_DIR)
assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized"
diff --git a/archivebox/plugins_pkg/puppeteer/config.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
similarity index 79%
rename from archivebox/plugins_pkg/puppeteer/config.py
rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
index b76d0779..f09e7062 100644
--- a/archivebox/plugins_pkg/puppeteer/config.py
+++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py
@@ -1,7 +1,7 @@
-__package__ = 'plugins_pkg.puppeteer'
+__package__ = 'abx_plugin_puppeteer'
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
###################### Config ##########################
diff --git a/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml b/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml
new file mode 100644
index 00000000..2633b481
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-puppeteer"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_puppeteer = "abx_plugin_puppeteer"
diff --git a/archivebox/pkgs/abx-plugin-readability/README.md b/archivebox/pkgs/abx-plugin-readability/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py
new file mode 100644
index 00000000..cb7d35af
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py
@@ -0,0 +1,30 @@
+__package__ = 'abx_plugin_readability'
+__label__ = 'Readability'
+__homepage__ = 'https://github.com/ArchiveBox/readability-extractor'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import READABILITY_CONFIG
+
+ return {
+ 'READABILITY_CONFIG': READABILITY_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import READABILITY_BINARY
+
+ return {
+ 'readability': READABILITY_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import READABILITY_EXTRACTOR
+
+ return {
+ 'readability': READABILITY_EXTRACTOR,
+ }
diff --git a/archivebox/plugins_extractor/readability/binaries.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py
similarity index 69%
rename from archivebox/plugins_extractor/readability/binaries.py
rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py
index 43343924..65ecf57c 100644
--- a/archivebox/plugins_extractor/readability/binaries.py
+++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py
@@ -1,20 +1,19 @@
-__package__ = 'plugins_extractor.readability'
+__package__ = 'abx_plugin_readability'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName
-from abx.archivebox.base_binary import BaseBinary, env
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_default_binproviders import env
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import READABILITY_CONFIG
READABILITY_PACKAGE_NAME = 'github:ArchiveBox/readability-extractor'
-class ReadabilityBinary(BaseBinary):
+class ReadabilityBinary(Binary):
name: BinName = READABILITY_CONFIG.READABILITY_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/archivebox/plugins_extractor/readability/config.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py
similarity index 83%
rename from archivebox/plugins_extractor/readability/config.py
rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py
index 8066d56c..726295fe 100644
--- a/archivebox/plugins_extractor/readability/config.py
+++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py
@@ -1,8 +1,6 @@
-__package__ = 'plugins_extractor.readability'
-
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py
new file mode 100644
index 00000000..64d712ed
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py
@@ -0,0 +1,19 @@
+# __package__ = 'abx_plugin_readability'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+
+# from .binaries import READABILITY_BINARY
+
+
+# class ReadabilityExtractor(BaseExtractor):
+# name: str = 'readability'
+# binary: BinName = READABILITY_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path:
+# return Path(snapshot.link_dir) / 'readability' / 'content.html'
+
+
+# READABILITY_EXTRACTOR = ReadabilityExtractor()
diff --git a/archivebox/extractors/readability.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py
similarity index 100%
rename from archivebox/extractors/readability.py
rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py
diff --git a/archivebox/pkgs/abx-plugin-readability/pyproject.toml b/archivebox/pkgs/abx-plugin-readability/pyproject.toml
new file mode 100644
index 00000000..59a2db64
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-readability/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-plugin-readability"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_readability = "abx_plugin_readability"
diff --git a/archivebox/pkgs/abx-plugin-readwise/README.md b/archivebox/pkgs/abx-plugin-readwise/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py b/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py
new file mode 100644
index 00000000..ea31cd14
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_readwise_extractor'
+__id__ = 'abx_plugin_readwise_extractor'
+__label__ = 'Readwise API'
+__version__ = '2024.10.27'
+__author__ = 'ArchiveBox'
+__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
+__dependencies__ = []
+
+import abx
+
+from typing import Dict
+from pathlib import Path
+
+from pydantic import Field
+
+from abx_spec_config.base_configset import BaseConfigSet
+
+SOURCES_DIR = abx.pm.hook.get_CONFIG().SOURCES_DIR
+
+
+class ReadwiseConfig(BaseConfigSet):
+ READWISE_DB_PATH: Path = Field(default=SOURCES_DIR / "readwise_reader_api.db")
+ READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...}
+
+
+@abx.hookimpl
+def get_CONFIG():
+ return {
+ __id__: ReadwiseConfig()
+ }
+
+@abx.hookimpl
+def ready():
+ READWISE_CONFIG = abx.pm.hook.get_CONFIG()[__id__]
+ READWISE_CONFIG.validate()
diff --git a/archivebox/pkgs/abx-plugin-readwise/pyproject.toml b/archivebox/pkgs/abx-plugin-readwise/pyproject.toml
new file mode 100644
index 00000000..c85d489f
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-readwise/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-readwise"
+version = "2024.10.28"
+description = "Readwise API Extractor"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_readwise = "abx_plugin_readwise"
+
diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/README.md b/archivebox/pkgs/abx-plugin-ripgrep-search/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py
new file mode 100644
index 00000000..91347523
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py
@@ -0,0 +1,31 @@
+__package__ = 'abx_plugin_ripgrep_search'
+__label__ = 'Ripgrep Search'
+__homepage__ = 'https://github.com/BurntSushi/ripgrep'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import RIPGREP_CONFIG
+
+ return {
+ 'RIPGREP_CONFIG': RIPGREP_CONFIG
+ }
+
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import RIPGREP_BINARY
+
+ return {
+ 'ripgrep': RIPGREP_BINARY
+ }
+
+
+@abx.hookimpl
+def get_SEARCHBACKENDS():
+ from .searchbackend import RIPGREP_SEARCH_BACKEND
+
+ return {
+ 'ripgrep': RIPGREP_SEARCH_BACKEND,
+ }
diff --git a/archivebox/plugins_search/ripgrep/binaries.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
similarity index 65%
rename from archivebox/plugins_search/ripgrep/binaries.py
rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
index 710a1ef0..ef9217ad 100644
--- a/archivebox/plugins_search/ripgrep/binaries.py
+++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import RIPGREP_CONFIG
-class RipgrepBinary(BaseBinary):
+class RipgrepBinary(Binary):
name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/plugins_search/ripgrep/config.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
similarity index 89%
rename from archivebox/plugins_search/ripgrep/config.py
rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
index 726c21e8..e0fd3b28 100644
--- a/archivebox/plugins_search/ripgrep/config.py
+++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
from pathlib import Path
from typing import List
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config import CONSTANTS
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/archivebox/plugins_search/ripgrep/searchbackend.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
similarity index 93%
rename from archivebox/plugins_search/ripgrep/searchbackend.py
rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
index 3c30af85..ed3965ba 100644
--- a/archivebox/plugins_search/ripgrep/searchbackend.py
+++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py
@@ -1,11 +1,11 @@
-__package__ = 'plugins_search.ripgrep'
+__package__ = 'abx_plugin_ripgrep_search'
import re
import subprocess
from typing import List, Iterable
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .binaries import RIPGREP_BINARY
from .config import RIPGREP_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml b/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml
new file mode 100644
index 00000000..67245c48
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-ripgrep-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_ripgrep_search = "abx_plugin_ripgrep_search"
diff --git a/archivebox/pkgs/abx-plugin-singlefile/README.md b/archivebox/pkgs/abx-plugin-singlefile/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py
new file mode 100644
index 00000000..be6dcd02
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_singlefile'
+__label__ = 'Singlefile'
+__homepage__ = 'https://github.com/gildas-lormeau/singlefile'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import SINGLEFILE_CONFIG
+
+ return {
+ 'SINGLEFILE_CONFIG': SINGLEFILE_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import SINGLEFILE_BINARY
+
+ return {
+ 'singlefile': SINGLEFILE_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import SINGLEFILE_EXTRACTOR
+
+ return {
+ 'singlefile': SINGLEFILE_EXTRACTOR,
+ }
+
+@abx.hookimpl
+def get_INSTALLED_APPS():
+ # needed to load ./models.py
+ return [__package__]
diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py
new file mode 100644
index 00000000..d928d0fd
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py
@@ -0,0 +1,27 @@
+__package__ = 'abx_plugin_singlefile'
+
+from typing import ClassVar
+from django.db.models import QuerySet
+from django.utils.functional import classproperty
+
+from actors.actor import ActorType
+
+from .models import SinglefileResult
+
+
+class SinglefileActor(ActorType[SinglefileResult]):
+ CLAIM_ORDER: ClassVar[str] = 'created_at DESC'
+ CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"'
+ CLAIM_SET: ClassVar[str] = 'status = "started"'
+
+ @classproperty
+ def QUERYSET(cls) -> QuerySet:
+ return SinglefileResult.objects.filter(status='queued')
+
+ def tick(self, obj: SinglefileResult):
+ print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count())
+ updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1
+ if not updated:
+ raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object')
+ obj.refresh_from_db()
+ obj.save()
diff --git a/archivebox/plugins_extractor/singlefile/binaries.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
similarity index 84%
rename from archivebox/plugins_extractor/singlefile/binaries.py
rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
index 0c8a1bab..7af784a3 100644
--- a/archivebox/plugins_extractor/singlefile/binaries.py
+++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py
@@ -1,13 +1,10 @@
-__package__ = 'plugins_extractor.singlefile'
-
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath
+from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
-from abx.archivebox.base_binary import BaseBinary, env
-
-from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
+from abx_plugin_default_binproviders import env
+from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER
from .config import SINGLEFILE_CONFIG
@@ -16,7 +13,7 @@ SINGLEFILE_MIN_VERSION = '1.1.54'
SINGLEFILE_MAX_VERSION = '1.1.60'
-class SinglefileBinary(BaseBinary):
+class SinglefileBinary(Binary):
name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env]
diff --git a/archivebox/plugins_extractor/singlefile/config.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py
similarity index 88%
rename from archivebox/plugins_extractor/singlefile/config.py
rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py
index 7d27031e..0d2164ba 100644
--- a/archivebox/plugins_extractor/singlefile/config.py
+++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py
@@ -1,11 +1,9 @@
-__package__ = 'plugins_extractor.singlefile'
-
from pathlib import Path
from typing import List, Optional
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
new file mode 100644
index 00000000..07b674ac
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py
@@ -0,0 +1,18 @@
+__package__ = 'abx_plugin_singlefile'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+# from .binaries import SINGLEFILE_BINARY
+
+
+# class SinglefileExtractor(BaseExtractor):
+# name: str = 'singlefile'
+# binary: BinName = SINGLEFILE_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path:
+# return Path(snapshot.link_dir) / 'singlefile.html'
+
+
+# SINGLEFILE_EXTRACTOR = SinglefileExtractor()
diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_extractor/singlefile/models.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py
similarity index 100%
rename from archivebox/plugins_extractor/singlefile/models.py
rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py
diff --git a/archivebox/extractors/singlefile.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py
similarity index 100%
rename from archivebox/extractors/singlefile.py
rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py
diff --git a/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml b/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml
new file mode 100644
index 00000000..7cecd40a
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-singlefile"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_singlefile = "abx_plugin_singlefile"
diff --git a/archivebox/pkgs/abx-plugin-sonic-search/README.md b/archivebox/pkgs/abx-plugin-sonic-search/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_search/sonic/__init__.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
similarity index 53%
rename from archivebox/plugins_search/sonic/__init__.py
rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
index a899679b..1a92a8d2 100644
--- a/archivebox/plugins_search/sonic/__init__.py
+++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py
@@ -1,32 +1,16 @@
-__package__ = 'plugins_search.sonic'
-__label__ = 'sonic'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
+__package__ = 'abx_plugin_sonic_search'
+__label__ = 'Sonic Search'
__homepage__ = 'https://github.com/valeriansaliou/sonic'
-__dependencies__ = []
import abx
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'sonic': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import SONIC_CONFIG
return {
- 'sonic': SONIC_CONFIG
+ 'SONIC_CONFIG': SONIC_CONFIG
}
diff --git a/archivebox/plugins_search/sonic/binaries.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
similarity index 80%
rename from archivebox/plugins_search/sonic/binaries.py
rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
index eab987c5..2e8fb536 100644
--- a/archivebox/plugins_search/sonic/binaries.py
+++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py
@@ -1,16 +1,16 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinaryOverrides, BinName
+from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, brew
+from abx_plugin_default_binproviders import brew, env
from .config import SONIC_CONFIG
-class SonicBinary(BaseBinary):
+class SonicBinary(Binary):
name: BinName = SONIC_CONFIG.SONIC_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo
diff --git a/archivebox/plugins_search/sonic/config.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
similarity index 93%
rename from archivebox/plugins_search/sonic/config.py
rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
index d54ed568..97cc7b3a 100644
--- a/archivebox/plugins_search/sonic/config.py
+++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_search.sonic'
+__package__ = 'abx_plugin_sonic_search'
import sys
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/archivebox/plugins_search/sonic/searchbackend.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
similarity index 97%
rename from archivebox/plugins_search/sonic/searchbackend.py
rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
index 1662e5b2..a63a0132 100644
--- a/archivebox/plugins_search/sonic/searchbackend.py
+++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py
@@ -2,7 +2,7 @@ __package__ = 'plugins_search.sonic'
from typing import List, Generator, cast
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .config import SONIC_CONFIG, SONIC_LIB
diff --git a/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml
new file mode 100644
index 00000000..b6551b52
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-plugin-sonic-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_sonic_search = "abx_plugin_sonic_search"
diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/README.md b/archivebox/pkgs/abx-plugin-sqlitefts-search/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py
new file mode 100644
index 00000000..5d5ed6de
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py
@@ -0,0 +1,21 @@
+__package__ = 'abx_plugin_sqlitefts_search'
+__label__ = 'SQLiteFTS Search'
+
+import abx
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import SQLITEFTS_CONFIG
+
+ return {
+ 'SQLITEFTS_CONFIG': SQLITEFTS_CONFIG
+ }
+
+
+@abx.hookimpl
+def get_SEARCHBACKENDS():
+ from .searchbackend import SQLITEFTS_SEARCH_BACKEND
+
+ return {
+ 'sqlitefts': SQLITEFTS_SEARCH_BACKEND,
+ }
diff --git a/archivebox/plugins_search/sqlitefts/config.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
similarity index 96%
rename from archivebox/plugins_search/sqlitefts/config.py
rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
index 5690dc6c..789ff114 100644
--- a/archivebox/plugins_search/sqlitefts/config.py
+++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py
@@ -1,4 +1,4 @@
-__package__ = 'plugins_search.sqlitefts'
+__package__ = 'abx_plugin_sqlitefts_search'
import sys
import sqlite3
@@ -8,7 +8,7 @@ from django.core.exceptions import ImproperlyConfigured
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import SEARCH_BACKEND_CONFIG
diff --git a/archivebox/plugins_search/sqlitefts/searchbackend.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
similarity index 98%
rename from archivebox/plugins_search/sqlitefts/searchbackend.py
rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
index 630bdd4c..2ae7c9cf 100644
--- a/archivebox/plugins_search/sqlitefts/searchbackend.py
+++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py
@@ -1,10 +1,10 @@
-__package__ = 'plugins_search.sqlitefts'
+__package__ = 'abx_plugin_sqlitefts_search'
import codecs
import sqlite3
from typing import List, Iterable
-from abx.archivebox.base_searchbackend import BaseSearchBackend
+from abx_spec_searchbackend import BaseSearchBackend
from .config import SQLITEFTS_CONFIG
diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml
new file mode 100644
index 00000000..abc6181a
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-sqlitefts-search"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_sqlitefts_search = "abx_plugin_sqlitefts_search"
diff --git a/archivebox/pkgs/abx-plugin-title/README.md b/archivebox/pkgs/abx-plugin-title/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py
new file mode 100644
index 00000000..d3e5cac5
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py
@@ -0,0 +1,9 @@
+import abx
+
+# @abx.hookimpl
+# def get_CONFIG():
+# from .config import TITLE_EXTRACTOR_CONFIG
+
+# return {
+# 'title_extractor': TITLE_EXTRACTOR_CONFIG
+# }
diff --git a/archivebox/extractors/title.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py
similarity index 97%
rename from archivebox/extractors/title.py
rename to archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py
index ceefb699..a8ef52cf 100644
--- a/archivebox/extractors/title.py
+++ b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py
@@ -11,8 +11,8 @@ from archivebox.misc.util import (
htmldecode,
dedupe,
)
-from archivebox.plugins_extractor.curl.config import CURL_CONFIG
-from archivebox.plugins_extractor.curl.binaries import CURL_BINARY
+from abx_plugin_curl_extractor.config import CURL_CONFIG
+from abx_plugin_curl_extractor.binaries import CURL_BINARY
from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
from ..logging_util import TimedProgress
diff --git a/archivebox/pkgs/abx-plugin-title/pyproject.toml b/archivebox/pkgs/abx-plugin-title/pyproject.toml
new file mode 100644
index 00000000..a9737b3a
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-title/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-title"
+version = "2024.10.27"
+description = "Title Extractor"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-plugin-curl>=2024.10.28",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_title = "abx_plugin_title"
diff --git a/archivebox/pkgs/abx-plugin-wget/README.md b/archivebox/pkgs/abx-plugin-wget/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py
new file mode 100644
index 00000000..a32987ee
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_wget'
+__label__ = 'WGET'
+
+import abx
+
+
+@abx.hookimpl
+def get_CONFIG():
+ from .config import WGET_CONFIG
+
+ return {
+ 'WGET_CONFIG': WGET_CONFIG
+ }
+
+@abx.hookimpl
+def get_BINARIES():
+ from .binaries import WGET_BINARY
+
+ return {
+ 'wget': WGET_BINARY,
+ }
+
+@abx.hookimpl
+def get_EXTRACTORS():
+ from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR
+
+ return {
+ 'wget': WGET_EXTRACTOR,
+ 'warc': WARC_EXTRACTOR,
+ }
+
+@abx.hookimpl
+def ready():
+ from .config import WGET_CONFIG
+ WGET_CONFIG.validate()
diff --git a/archivebox/plugins_extractor/wget/binaries.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py
similarity index 57%
rename from archivebox/plugins_extractor/wget/binaries.py
rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py
index 6198beac..39cbe111 100644
--- a/archivebox/plugins_extractor/wget/binaries.py
+++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py
@@ -1,17 +1,17 @@
-__package__ = 'plugins_extractor.wget'
+__package__ = 'abx_plugin_wget'
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName
+from pydantic_pkgr import BinProvider, BinName, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
+from abx_plugin_default_binproviders import apt, brew, env
from .config import WGET_CONFIG
-class WgetBinary(BaseBinary):
+class WgetBinary(Binary):
name: BinName = WGET_CONFIG.WGET_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/plugins_extractor/wget/config.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py
similarity index 96%
rename from archivebox/plugins_extractor/wget/config.py
rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py
index 12edf672..1dfd1b07 100644
--- a/archivebox/plugins_extractor/wget/config.py
+++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py
@@ -1,12 +1,10 @@
-__package__ = 'plugins_extractor.wget'
-
import subprocess
from typing import List, Optional
from pathlib import Path
from pydantic import Field
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG
from archivebox.misc.logging import STDERR
diff --git a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py
new file mode 100644
index 00000000..4d4d0243
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py
@@ -0,0 +1,35 @@
+__package__ = 'abx_plugin_wget'
+
+# from pathlib import Path
+
+# from pydantic_pkgr import BinName
+
+# from .binaries import WGET_BINARY
+# from .wget_util import wget_output_path
+
+# class WgetExtractor(BaseExtractor):
+# name: ExtractorName = 'wget'
+# binary: BinName = WGET_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# wget_index_path = wget_output_path(snapshot.as_link())
+# if wget_index_path:
+# return Path(wget_index_path)
+# return None
+
+# WGET_EXTRACTOR = WgetExtractor()
+
+
+# class WarcExtractor(BaseExtractor):
+# name: ExtractorName = 'warc'
+# binary: BinName = WGET_BINARY.name
+
+# def get_output_path(self, snapshot) -> Path | None:
+# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
+# if warc_files:
+# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
+# return None
+
+
+# WARC_EXTRACTOR = WarcExtractor()
+
diff --git a/archivebox/extractors/wget.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py
similarity index 97%
rename from archivebox/extractors/wget.py
rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py
index 416e797e..caaaeaf6 100644
--- a/archivebox/extractors/wget.py
+++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py
@@ -1,4 +1,4 @@
-__package__ = 'archivebox.extractors'
+__package__ = 'abx_plugin_wget_extractor'
import re
import os
@@ -17,10 +17,11 @@ from archivebox.misc.util import (
urldecode,
dedupe,
)
-from archivebox.plugins_extractor.wget.config import WGET_CONFIG
-from archivebox.plugins_extractor.wget.binaries import WGET_BINARY
-from ..logging_util import TimedProgress
-from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
+from .config import WGET_CONFIG
+from .binaries import WGET_BINARY
+
+from archivebox.logging_util import TimedProgress
+from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError
def get_output_path():
diff --git a/archivebox/plugins_extractor/wget/wget_util.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py
similarity index 100%
rename from archivebox/plugins_extractor/wget/wget_util.py
rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py
diff --git a/archivebox/pkgs/abx-plugin-wget/pyproject.toml b/archivebox/pkgs/abx-plugin-wget/pyproject.toml
new file mode 100644
index 00000000..d401e52f
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-wget/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-plugin-wget"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_wget = "abx_plugin_wget"
diff --git a/archivebox/pkgs/abx-plugin-ytdlp/README.md b/archivebox/pkgs/abx-plugin-ytdlp/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/plugins_extractor/ytdlp/__init__.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
similarity index 53%
rename from archivebox/plugins_extractor/ytdlp/__init__.py
rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
index 1dc9ef99..5b1d9968 100644
--- a/archivebox/plugins_extractor/ytdlp/__init__.py
+++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py
@@ -1,30 +1,15 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
__label__ = 'YT-DLP'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/yt-dlp/yt-dlp'
import abx
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'ytdlp': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- }
- }
-
@abx.hookimpl
def get_CONFIG():
from .config import YTDLP_CONFIG
return {
- 'ytdlp': YTDLP_CONFIG
+ 'YTDLP_CONFIG': YTDLP_CONFIG
}
@abx.hookimpl
diff --git a/archivebox/plugins_extractor/ytdlp/binaries.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
similarity index 77%
rename from archivebox/plugins_extractor/ytdlp/binaries.py
rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
index 730de2dc..69239515 100644
--- a/archivebox/plugins_extractor/ytdlp/binaries.py
+++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py
@@ -1,26 +1,25 @@
-__package__ = 'plugins_extractor.ytdlp'
+__package__ = 'abx_plugin_ytdlp'
import subprocess
from typing import List
from pydantic import InstanceOf
-from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
+from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary
-from abx.archivebox.base_binary import BaseBinary, env, apt, brew
-
-from plugins_pkg.pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
+from abx_plugin_default_binproviders import apt, brew, env
+from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from .config import YTDLP_CONFIG
-class YtdlpBinary(BaseBinary):
+class YtdlpBinary(Binary):
name: BinName = YTDLP_CONFIG.YTDLP_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
YTDLP_BINARY = YtdlpBinary()
-class FfmpegBinary(BaseBinary):
+class FfmpegBinary(Binary):
name: BinName = 'ffmpeg'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
diff --git a/archivebox/plugins_extractor/ytdlp/config.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
similarity index 97%
rename from archivebox/plugins_extractor/ytdlp/config.py
rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
index 0082df3d..b36d19d1 100644
--- a/archivebox/plugins_extractor/ytdlp/config.py
+++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py
@@ -4,7 +4,7 @@ from typing import List
from pydantic import Field, AliasChoices
-from abx.archivebox.base_configset import BaseConfigSet
+from abx_spec_config.base_configset import BaseConfigSet
from archivebox.config.common import ARCHIVING_CONFIG
from archivebox.misc.logging import STDERR
diff --git a/archivebox/extractors/media.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py
similarity index 100%
rename from archivebox/extractors/media.py
rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py
diff --git a/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml b/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml
new file mode 100644
index 00000000..b45626bd
--- /dev/null
+++ b/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml
@@ -0,0 +1,19 @@
+[project]
+name = "abx-plugin-ytdlp"
+version = "2024.10.28"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_plugin_ytdlp = "abx_plugin_ytdlp"
diff --git a/archivebox/pkgs/abx-spec-archivebox/README.md b/archivebox/pkgs/abx-spec-archivebox/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py
new file mode 100644
index 00000000..ab591c96
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py
@@ -0,0 +1,28 @@
+__package__ = 'abx_spec_archivebox'
+__order__ = 400
+
+# from .effects import *
+# from .events import *
+# from .reads import *
+# from .writes import *
+# from .states import *
+
+from typing import cast
+
+import abx
+from abx_spec_config import ConfigPluginSpec
+from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec
+from abx_spec_django import DjangoPluginSpec
+from abx_spec_searchbackend import SearchBackendPluginSpec
+
+class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
+ """
+ ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs.
+ """
+ pass
+
+PLUGIN_SPEC = ArchiveBoxPluginSpec
+
+
+TypedPluginManager = abx.ABXPluginManager[ArchiveBoxPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/archivebox/abx/archivebox/effects.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py
similarity index 100%
rename from archivebox/abx/archivebox/effects.py
rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py
diff --git a/archivebox/abx/archivebox/events.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py
similarity index 100%
rename from archivebox/abx/archivebox/events.py
rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py
diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py
new file mode 100644
index 00000000..30d6667d
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py
@@ -0,0 +1,33 @@
+__package__ = 'abx.archivebox'
+
+
+from benedict import benedict
+
+
+def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
+ """Get all the relevant config for the given scope, in correct precedence order"""
+
+ from django.conf import settings
+ default_config: benedict = defaults or settings.CONFIG
+
+ snapshot = snapshot or (archiveresult and archiveresult.snapshot)
+ crawl = crawl or (snapshot and snapshot.crawl)
+ seed = seed or (crawl and crawl.seed)
+ persona = persona or (crawl and crawl.persona)
+
+ persona_config = persona.config if persona else {}
+ seed_config = seed.config if seed else {}
+ crawl_config = crawl.config if crawl else {}
+ snapshot_config = snapshot.config if snapshot else {}
+ archiveresult_config = archiveresult.config if archiveresult else {}
+ extra_config = extra_config or {}
+
+ return benedict({
+ **default_config, # defaults / config file / environment variables
+ **persona_config, # lowest precedence
+ **seed_config,
+ **crawl_config,
+ **snapshot_config,
+ **archiveresult_config,
+ **extra_config, # highest precedence
+ })
diff --git a/archivebox/abx/archivebox/states.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py
similarity index 61%
rename from archivebox/abx/archivebox/states.py
rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py
index 15d06f61..05284f37 100644
--- a/archivebox/abx/archivebox/states.py
+++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py
@@ -20,11 +20,130 @@ from django.urls import reverse_lazy
from pathlib import Path
+# Glossary:
+# - startup: when a new process is spawned
+# - shutdown: when a process is exiting
+# - start: at the beginning of some python code block
+# - end: at the end of some python code block
+# - queue: a django queryset of objects of a single type that are waiting to be processed
+# - actor: a long-running daemon process that wakes up and processes a single object from a queue at a time
+# - plugin: a python package that defines some hookimpls based on hookspecs exposed by ABX
+# - object: an instance of a django model that represents a single row in the database
+# ORCHESTRATOR:
+# An orchestrator is a single long-running daemon process that manages spawning and killing actors for different queues of objects.
+# The orchestrator first starts when the archivebox starts, and it stops when archivebox is killed.
+# Only one orchestrator process can be running per collection per machine.
+# An orchestrator is aware of all of the ActorTypes that are defined in the system, and their associated queues.
+# When started, the orchestrator runs a single runloop that continues until the archivebox process is killed.
+# On each loop, the orchestrator:
+# - loops through each ActorType defined in the system:
+# - fetches the queue of objects pending for that ActorType by calling ActorType.get_queue()
+# - check how many actors are currently running for that ActorType by calling current_actors = ActorType.get_running_actors()
+# - determine how many new actors are needed and what their launch kwargs should be to process the objects in each queue
+# actors_to_spawn = ActorType.get_actors_to_spawn(queue, current_actors)
+# - e.g. if there is are 4 ArchiveResult objects queued all with the same persona + extractor, it should spawn a single actor to process all of them, if there are 4000 it should spawn ~5 actors
+# - if there are 4 ArchiveResult objects queued with different personas + extractors, it should spawn a single actor for each persona + extractor
+# - if there are *many* objects to process, it can spawn more actors of the same type up to ActorType.MAX_ACTORS to speed things up
+# - spawns the new of actors needed as subprocesses ActorType.spawn_actors(actors_to_spawn, block=False, double_fork=False)
+# - checks for ANY objects in the DB that have a retry_at time set but where no ActorType has them in their queue, and raises a warning that they are orphaned and will never be processed
+# - sleeps for 0.1s before repeating the loop, to reduce the CPU load
+# The orchestrator does not manage killing actors, actors are expected to exit on their own when idle.
+# ABX defines the following hookspecs for plugins to hook into the orchestrator lifecycle:
+# - abx.pm.hook.on_orchestrator_startup(all_actor_types)
+# - abx.pm.hook.on_orchestrator_tick_started(all_actor_types, all_queues, all_running_actors)
+# - abx.pm.hook.on_orchestrator_idle(all_actor_types) # only run when there are no queues with pending objects to process
+# - abx.pm.hook.on_orchestrator_shutdown(all_actor_types)
+# OBJECT:
+# e.g. Snapshot, Crawl, ArchiveResult
+# An object is a single row in a database table, defined by a django model.
+# An object has a finite set of states that it can be in.
+# An object has a status field that holds the object's current state e.g status="queued".
+# An object has a retry_at field that holds a timestamp for when it should next be checked by a actor eventloop.
+# Each type of object has a single tick() method defined that handles all of its state transitions.
+# When an object's retry_at time has passed, the actor managing that type of object will spwan an actor an call tick(object) to move it to its next state.
+# ABX defines the following hookspecs for plugins to hook into object lifecycle: # use these for in-memory operations, dont use these for db on_create/on_update/on_delete logic, separate hooks are available on write operations below
+# - abx.pm.hook.on__init(object) # when object is initialized in-memory, don't put any slow code here as it runs on every object returned from DB queries! only for setting default values, ._cache_attrs, etc.
+# - abx.pm.hook.on__clean(object) # when object's form fields are validated but before it is to be saved to the DB, put any checks/validations on field values here
+# - abx.pm.hook.on__save(object) # when object is being saved to the DB, put any code here that should run right before super().save()
+# ACTORS:
+# A actor is a long-running daemon process that runs a loop to process a single object at a time from a queue it defines (e.g. ActorType.queue=Snapshot.objects.filter(status='queued', retry_at__lte=time.now())).
+# An actor at runtime is an instance of an ActorType class + some launch kwargs that it's passed at startup (e.g. persona, extractor, etc.).
+# Actors are started lazily by the orchestrator only when their ActorType.queue indicates there are pending objects to process.
+# ActorTypes should define ActorType.get_queue(), ActorType.get_actors_to_spawn(), ActorType.get_running_actors(), and ActorType.spawn_actors() methods exposed to the orchestrator.
+# On startup, a actor can initialize shared resources it needs to perform its work, and keep a reference in memory to them. (e.g. launch chrome in the background, setup an API client, etc.)
+# On each loop, the actor gets a single object to process from the top of the queue, and runs ActorType.tick(object).
+# The actor should have a hardcoded ActorType.MAX_TICK_TIME, and should enforce it by killing the tick() method if it runs too long.
+# Before calling tick(), a actor should bump the object.retry_at time by MAX_TICK_TIME to prevent other actors from picking it up while the current actor is still processing it.
+# The actor blocks waiting for tick(obj) to finish executing, then the loop repeats and it gets the next object to call tick(object) on.
+# If a tick(obj) method raises an exception, the actor should catch it and log it, then move on to the next object in the queue.
+# If there are no objects left in the queue, the actor should exit.
+# On exit, a actor should release any shared resources it initialized on startup and clean up after itself.
+# On startup an actor should fire abx.pm.hook.on_actor_startup(object) and on exit it should fire abx.pm.hook.on_actor_exit(object) (both syncronous hooks that can be used by plugins to register any startup/cleanup code).
+# An ActorType defines the following hookspecs for plugins to hook into its behavior:
+# - abx.pm.hook.on_actor_startup(actor, queue)
+# - abx.pm.hook.on_actor_tick_start(actor, object)
+# - abx.pm.hook.on_actor_tick_end(actor, object)
+# - abx.pm.hook.on_actor_tick_exception(actor, object, exception)
+# - abx.pm.hook.on_actor_shutdown(actor)
+# TICK:
+# A tick() method is a method defined on an ActorType, passed a single object to process and perform a single state transition on.
+# A tick() method does NOT need to lock the object its operating on, the actor will bump the object's retry_at += MAX_TICK_TIME before handing it off to tick().
+# A tick() method does NOT open a DB transaction for its entire duration of execution, instead it should do all its writes in one atomic operation using a compare-and-swap .select(status=previous_state).update(status=next_state) (optimistic concurrency control).
+# A tick() method does NOT return any values, it either succeeds and returns None, or fails and raises an exception to be handled by the actor runloop.
+# A tick() method does NOT need to enforce its own MAX_TICK_TIME / any timeouts, the actor runloop code should enforce that.
+# A tick() should NOT call other tick() methods directly, and it should not spawn orchestrator or actor processes.
+# A tick() should set its object.retry_at time to a value farther in the future and return early if it wants to skip execution due to hitting a ratelimit or transient error.
+# A tick() can:
+# - read from any other objects, filesystem, or external APIs (e.g. check if snapshot_dir/screenshot.png exists)
+# - perform any checks necessary and branch and determine what the transition it should perform to which next state
+# - execute a single transition_from_abx_to_xyz(object) method to perform the transition to the next state it decided on
+
+# TRANSITION:
+# A transition_from_abx_to_xyz(object) method is a function defined on an ActorType, passed a single object by a tick() method to perform a defined transition on.
+# A transition_from_abx_to_xyz() method does NOT need to lock the object its operating on or open any db transactions.
+# A transiton should not have any branching logic, it should only execute the given transition that it defines + any side effects.
+# A transition should be indempotent, if two transitions run at once on the same object it should only perform one transition and the other should fail
+# A transition should be atomic, if it is interrupted it should leave the object in a consistent state
+# A transition's main body should:
+# - perform a SINGLE write() to the underlying object using a compare_and_swap .filter(status=last_state).update(status=next_state) to move it to its next state
+# - update the object's retry_at time to a new value, or set it to None if it's in a final state & should not be checked again
+# A transition can also trigger side effects at the end of its execution:
+# - update the retry_at time on *other* objects (so that they are rechecked by their own actor on the next tick) (ONLY retry_at, do not update any other fields)
+# - filesystem operations (e.g. moving a directory to a new location)
+# - external API calls (e.g. uploading to s3, firing a webhook, writing to a logfile, etc.)
+# - DO NOT use side effects to directly mutate other objects state or trigger other state transitions
+# ABX defines the following hookspecs for plugins to hook into transition behavior:
+# - abx.pm.hook.on_transition__from_abx_to_xyz_start(object)
+# - abx.pm.hook.on_transition__from_abx_to_xyz_end(object)
+
+# READ:
+# A read() method is a function defined for a given ActorType that performs a single read from the DB and/or other read models like django cache, filesystem, in-memory caches, etc.
+# A read() method should accept either an instance/pk/uuid/abid or some filter_kwargs, and return a benedict/TypedDict or pydantic model containing bare values as the result.
+
+# WRITE:
+# A write() method is a function defined for a given ActorType that performs a single atomic db write to update the DB, django cache, filesystem, in-memory caches, etc. for that object.
+# A write() method does NOT need to lock the object its operating on or open any db transactions, it should just perform a single compare-and-swap .select(status=last_state).update(status=next_state) operation.
+# A write() method does NOT need to enforce any timeouts or ratelimits, the tick() method should do that.
+# A write() method should NOT have any branching logic or side effects like spawning other processes.
+# ABX defines the following hookspecs for plugins to hook into write behavior:
+# - abx.pm.hook.on__created(object)
+# - abx.pm.hook.on__updated(object)
+# - abx.pm.hook.on__deleted(object)
+
+# SIDEEFFECT:
+# A sideeffect is a helper function defined in an app to be used by one or more tick() methods to perform a side effect that isn't a simple DB write or read.
+# A sideeffect can spawn other processes, make 3rd-party API calls, write to the filesystem, etc. e.g. subprocess.Popen('wget https://example.com')
+# A sideeffect should execute quickly and return early, it should try not to block for slow RPCs, subprocess jobs, or network operations.
+# For slow or long-running sideeffects, spawn a separate background process and return immediately. Update the object's retry_at time and state as-needed so that a future tick() will check for any expected output from the background job.
+# ABX defines the following hookspecs for plugins to hook into sideeffect behavior:
+# - abx.pm.hook.on_sideeffect_xyz_started(object)
+# - abx.pm.hook.on_sideeffect_xyz_succeeded(object)
+# - abx.pm.hook.on_sideeffect_xyz_failed(object)
@@ -99,6 +218,7 @@ def transition_snapshot_to_started(snapshot, config, cwd):
fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'end_ts': None}
snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update)
+ # trigger side effects on state transition (these just emit an event to a separate queue thats then processed by a huey worker)
cleanup_snapshot_dir(snapshot, config, cwd)
create_snapshot_pending_archiveresults(snapshot, config, cwd)
update_snapshot_index_json(archiveresult, config, cwd)
@@ -114,6 +234,7 @@ def transition_snapshot_to_sealed(snapshot, config, cwd):
fields_to_update = {'status': 'sealed', 'retry_at': None, 'end_ts': time.now()}
snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update)
+ # side effects:
cleanup_snapshot_dir(snapshot, config, cwd)
update_snapshot_index_json(snapshot, config, cwd)
update_snapshot_index_html(snapshot, config, cwd)
@@ -225,7 +346,7 @@ def transition_archiveresult_to_started(archiveresult, config, cwd):
fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'output': None, 'error': None}
archiveresult = abx.archivebox.writes.update_archiveresult(filter=archiveresult_to_update, update=fields_to_update)
-
+ # side effects:
with TimedProgress():
try:
from .extractors import WARC_EXTRACTOR
@@ -334,7 +455,7 @@ def on_crawl_created(crawl):
@abx.hookimpl
def on_snapshot_created(snapshot, config):
- create_archiveresults_pending_from_snapshot(snapshot, config)
+ create_snapshot_pending_archiveresults(snapshot, config)
# events
@abx.hookimpl
@@ -361,7 +482,7 @@ def scheduler_runloop():
try:
abx.archivebox.events.on_crawl_schedule_tick(scheduled_crawl)
except Exception as e:
- abx.archivebox.events.on_crawl_schedule_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl)
+ abx.archivebox.events.on_crawl_schedule_tick_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl)
# abx.archivebox.events.on_scheduler_tick_end(timezone.now(), machine=Machine.objects.get_current_machine(), tasks=scheduled_tasks_due)
time.sleep(1)
@@ -420,7 +541,7 @@ def create_root_snapshot(crawl):
abx.archivebox.writes.update_crawl_stats(started_at=timezone.now())
-def create_archiveresults_pending_from_snapshot(snapshot, config):
+def create_snapshot_pending_archiveresults(snapshot, config):
config = get_scope_config(
# defaults=settings.CONFIG_FROM_DEFAULTS,
# configfile=settings.CONFIG_FROM_FILE,
diff --git a/archivebox/abx/archivebox/writes.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py
similarity index 98%
rename from archivebox/abx/archivebox/writes.py
rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py
index 0c4566b4..977543d2 100644
--- a/archivebox/abx/archivebox/writes.py
+++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py
@@ -8,7 +8,6 @@ from benedict import benedict
from django.conf import settings
import abx
-from .. import pm
@abx.hookimpl
@@ -88,7 +87,7 @@ def create_root_snapshot_from_seed(crawl):
def create_archiveresults_pending_from_snapshot(snapshot, config):
config = get_scope_config(
# defaults=settings.CONFIG_FROM_DEFAULTS,
- # configfile=settings.CONFIG_FROM_FILE,
+ # collection=settings.CONFIG_FROM_FILE,
# environment=settings.CONFIG_FROM_ENVIRONMENT,
persona=archiveresult.snapshot.crawl.persona,
seed=archiveresult.snapshot.crawl.seed,
diff --git a/archivebox/pkgs/abx-spec-archivebox/pyproject.toml b/archivebox/pkgs/abx-spec-archivebox/pyproject.toml
new file mode 100644
index 00000000..349698a7
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-archivebox/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-spec-archivebox"
+version = "0.1.0"
+description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "django>=5.1.1,<6.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_archivebox = "abx_spec_archivebox"
diff --git a/archivebox/pkgs/abx-spec-config/README.md b/archivebox/pkgs/abx-spec-config/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py
new file mode 100644
index 00000000..6aeedb71
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py
@@ -0,0 +1,151 @@
+__order__ = 100
+
+import os
+from pathlib import Path
+from typing import Dict, Any, cast
+
+from benedict import benedict
+
+
+import abx
+
+from .base_configset import BaseConfigSet, ConfigKeyStr
+
+
+class ConfigPluginSpec:
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_collection_config_path(self) -> Path:
+ return Path(os.getcwd()) / "ArchiveBox.conf"
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_system_config_path(self) -> Path:
+ return Path('~/.config/abx/abx.conf').expanduser()
+
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]:
+ """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
+ return {
+ # override this in your plugin to return your plugin's config, e.g.
+ # 'ytdlp': YtdlpConfig(...),
+ }
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]:
+ """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
+ return abx.as_dict(pm.hook.get_CONFIG())
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]:
+ """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
+ return benedict({
+ key: value
+ for configset in pm.hook.get_CONFIGS().values()
+ for key, value in benedict(configset).items()
+ })
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]:
+ """Get the config as it applies to you right now, based on the current context"""
+ return benedict({
+ **pm.hook.get_default_config(default=default),
+ # **pm.hook.get_machine_config(machine),
+ **pm.hook.get_environment_config(environment=environment),
+ **pm.hook.get_collection_config(collection=collection),
+ **pm.hook.get_user_config(user=user),
+ **pm.hook.get_crawl_config(crawl=crawl),
+ **pm.hook.get_snapshot_config(snapshot=snapshot),
+ **pm.hook.get_archiveresult_config(archiveresult=archiveresult),
+ # **pm.hook.get_request_config(request=request),
+ **(extra or {}),
+ })
+
+ # @abx.hookspec(firstresult=True)
+ # @abx.hookimpl
+ # def get_request_config(self, request) -> dict:
+ # session = getattr(request, 'session', None)
+ # return getattr(session, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]:
+ return getattr(archiveresult, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]:
+ return getattr(snapshot, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]:
+ return getattr(crawl, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]:
+ return getattr(user, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]:
+ # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
+ # {} = empty dict, override to ignore the collection config
+ return benedict({
+ key: value
+ for configset in pm.hook.get_CONFIGS().values()
+ for key, value in configset.from_collection().items()
+ }) if collection == ... else collection
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]:
+ # ... = ellipsis, means automatically get the environment config from the active environment variables
+ # {} = empty dict, override to ignore the environment config
+ return benedict({
+ key: value
+ for configset in pm.hook.get_CONFIGS().values()
+ for key, value in configset.from_environment().items()
+ }) if environment == ... else environment
+
+ # @abx.hookspec(firstresult=True)
+ # @abx.hookimpl
+ # def get_machine_config(self, machine=...) -> dict:
+ # # ... = ellipsis, means automatically get the machine config from the currently executing machine
+ # # {} = empty dict, override to ignore the machine config
+ # if machine == ...:
+ # machine = Machine.objects.get_current()
+ # return getattr(machine, 'config', None) or {}
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]:
+ # ... = ellipsis, means automatically get the machine config from the currently executing machine
+ # {} = empty dict, override to ignore the machine config
+ return benedict({
+ key: value
+ for configset in pm.hook.get_CONFIGS().values()
+ for key, value in configset.from_defaults().items()
+ }) if default == ... else default
+
+
+ # TODO: add read_config_file(), write_config_file() hooks
+
+
+PLUGIN_SPEC = ConfigPluginSpec
+
+
+class ExpectedPluginSpec(ConfigPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py
similarity index 72%
rename from archivebox/abx/archivebox/base_configset.py
rename to archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py
index 3a6695a1..434db331 100644
--- a/archivebox/abx/archivebox/base_configset.py
+++ b/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py
@@ -1,36 +1,32 @@
-__package__ = 'abx.archivebox'
+__package__ = 'abx_spec_config'
import os
import sys
import re
from pathlib import Path
from typing import Type, Tuple, Callable, ClassVar, Dict, Any
+from typing_extensions import Annotated
import toml
from rich import print
from benedict import benedict
-from pydantic import model_validator, TypeAdapter, AliasChoices
+from pydantic import model_validator, TypeAdapter, AliasChoices, AfterValidator
from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource
from pydantic_settings.sources import TomlConfigSettingsSource
-from pydantic_pkgr import func_takes_args_or_kwargs
-
+import abx
from . import toml_util
-PACKAGE_DIR = Path(__file__).resolve().parent.parent
-DATA_DIR = Path(os.getcwd()).resolve()
-
-ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf"
-ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak"
-
AUTOFIXES_HEADER = "[AUTOFIXES]"
AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:"
_ALREADY_WARNED_ABOUT_UPDATED_CONFIG = set()
+ConfigKeyStr = Annotated[str, AfterValidator(lambda x: x.isidentifier() and x.isupper() and not x.startswith('_'))]
+
class FlatTomlConfigSettingsSource(TomlConfigSettingsSource):
"""
@@ -98,9 +94,10 @@ class BaseConfigSet(BaseSettings):
revalidate_instances="subclass-instances",
)
- load_from_defaults: ClassVar[bool] = True
- load_from_configfile: ClassVar[bool] = True
- load_from_environment: ClassVar[bool] = True
+ load_from_defaults: ClassVar[bool] = True # read from schema defaults
+ load_from_system: ClassVar[bool] = True # read from ~/.config/abx/abx.conf
+ load_from_collection: ClassVar[bool] = True # read from ./ArchiveBox.conf
+ load_from_environment: ClassVar[bool] = True # read from environment variables
@classmethod
def settings_customise_sources(
@@ -115,47 +112,41 @@ class BaseConfigSet(BaseSettings):
# import ipdb; ipdb.set_trace()
- precedence_order = {}
+ default_configs = [init_settings] if cls.load_from_defaults else []
+ system_configs = []
+ collection_configs = []
+ environment_configs = [env_settings] if cls.load_from_environment else []
- # if ArchiveBox.conf does not exist yet, return defaults -> env order
- if not ARCHIVEBOX_CONFIG_FILE.is_file():
- precedence_order = {
- 'defaults': init_settings,
- 'environment': env_settings,
- }
+ # load system config from ~/.config/abx/abx.conf
+ SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
+ if cls.load_from_system and os.path.isfile(SYSTEM_CONFIG_FILE):
+ try:
+ system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
+ except Exception as err:
+ if err.__class__.__name__ == "TOMLDecodeError":
+ convert_ini_to_toml(SYSTEM_CONFIG_FILE)
+ system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)]
+ else:
+ raise
+
+ COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
+ if cls.load_from_collection and os.path.isfile(COLLECTION_CONFIG_FILE):
+ try:
+ collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
+ except Exception as err:
+ if err.__class__.__name__ == "TOMLDecodeError":
+ convert_ini_to_toml(COLLECTION_CONFIG_FILE)
+ collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)]
+ else:
+ raise
- # if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order
- try:
- precedence_order = precedence_order or {
- 'defaults': init_settings,
- 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
- 'environment': env_settings,
- }
- except Exception as err:
- if err.__class__.__name__ != "TOMLDecodeError":
- raise
- # if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order
-
- # Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak
- original_ini = ARCHIVEBOX_CONFIG_FILE.read_text()
- ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini)
- new_toml = toml_util.convert(original_ini)
- ARCHIVEBOX_CONFIG_FILE.write_text(new_toml)
-
- precedence_order = {
- 'defaults': init_settings,
- 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE),
- 'environment': env_settings,
- }
-
- if not cls.load_from_environment:
- precedence_order.pop('environment')
- if not cls.load_from_configfile:
- precedence_order.pop('configfile')
- if not cls.load_from_defaults:
- precedence_order.pop('defaults')
-
- return tuple(precedence_order.values())
+ precedence_order = [
+ *default_configs,
+ *system_configs,
+ *collection_configs,
+ *environment_configs,
+ ]
+ return tuple(precedence_order)
@model_validator(mode="after")
def fill_defaults(self):
@@ -173,7 +164,7 @@ class BaseConfigSet(BaseSettings):
"""Manual validation method, to be called from plugin/__init__.py:get_CONFIG()"""
pass
- def get_default_value(self, key):
+ def get_default_value(self, key: ConfigKeyStr):
"""Get the default value for a given config key"""
field = self.model_fields[key]
value = getattr(self, key)
@@ -202,7 +193,9 @@ class BaseConfigSet(BaseSettings):
Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it.
SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue.
"""
- from archivebox.misc.toml_util import CustomTOMLEncoder
+
+ COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path()
+ # SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path()
# silence warnings if they've already been shown once
if all(key in _ALREADY_WARNED_ABOUT_UPDATED_CONFIG for key in kwargs.keys()):
@@ -222,10 +215,10 @@ class BaseConfigSet(BaseSettings):
# if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section
try:
- if persist and ARCHIVEBOX_CONFIG_FILE.is_file():
- autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder())
+ if persist and COLLECTION_CONFIG_FILE.is_file():
+ autofixes_to_add = benedict(kwargs).to_toml(encoder=toml_util.CustomTOMLEncoder())
- existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
+ existing_config = COLLECTION_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip()
if AUTOFIXES_HEADER in existing_config:
existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip()
else:
@@ -238,7 +231,7 @@ class BaseConfigSet(BaseSettings):
existing_autofixes,
autofixes_to_add,
] if line.strip()).strip() + '\n'
- ARCHIVEBOX_CONFIG_FILE.write_text(new_config)
+ COLLECTION_CONFIG_FILE.write_text(new_config)
except Exception:
pass
self.__init__()
@@ -248,7 +241,7 @@ class BaseConfigSet(BaseSettings):
return self
@property
- def aliases(self) -> Dict[str, str]:
+ def aliases(self) -> Dict[ConfigKeyStr, ConfigKeyStr]:
alias_map = {}
for key, field in self.model_fields.items():
alias_map[key] = key
@@ -274,36 +267,36 @@ class BaseConfigSet(BaseSettings):
return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_')
- def from_defaults(self) -> Dict[str, Any]:
+ def from_defaults(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the default values"""
class OnlyDefaultsConfig(self.__class__):
load_from_defaults = True
- load_from_configfile = False
+ load_from_collection = False
load_from_environment = False
return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
- def from_configfile(self) -> Dict[str, Any]:
- """Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf"""
+ def from_collection(self) -> Dict[ConfigKeyStr, Any]:
+ """Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf"""
class OnlyConfigFileConfig(self.__class__):
load_from_defaults = False
- load_from_configfile = True
+ load_from_collection = True
load_from_environment = False
return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
- def from_environment(self) -> Dict[str, Any]:
+ def from_environment(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the environment variables"""
class OnlyEnvironmentConfig(self.__class__):
load_from_defaults = False
- load_from_configfile = False
+ load_from_collection = False
load_from_environment = True
return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys())))
- def from_computed(self) -> Dict[str, Any]:
+ def from_computed(self) -> Dict[ConfigKeyStr, Any]:
"""Get the dictionary of {key: value} config loaded from the computed fields"""
return benedict(self.model_dump(include=set(self.model_computed_fields.keys())))
- def to_toml_dict(self, defaults=False) -> Dict[str, Any]:
+ def to_toml_dict(self, defaults=False) -> Dict[ConfigKeyStr, Any]:
"""Get the current config as a TOML-ready dict"""
config_dict = {}
for key, value in benedict(self).items():
@@ -323,10 +316,24 @@ class BaseConfigSet(BaseSettings):
return toml.dumps(toml_dict, encoder=CustomTOMLEncoder())
- def as_legacy_config_schema(self) -> Dict[str, Any]:
- # shim for backwards compatibility with old config schema style
- model_values = self.model_dump()
- return benedict({
- key: {'type': field.annotation, 'default': model_values[key]}
- for key, field in self.model_fields.items()
- })
+
+
+def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool:
+ """returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless"""
+ code = lambda_func.__code__
+ has_args = code.co_argcount > 0
+ has_varargs = code.co_flags & 0x04 != 0
+ has_varkw = code.co_flags & 0x08 != 0
+ return has_args or has_varargs or has_varkw
+
+
+
+
+def convert_ini_to_toml(ini_file: Path):
+ """Convert an INI file to a TOML file, saving the original to .ORIGINALNAME.bak"""
+
+ bak_path = ini_file.parent / f'.{ini_file.name}.bak'
+ original_ini = ini_file.read_text()
+ bak_path.write_text(original_ini)
+ new_toml = toml_util.convert(original_ini)
+ ini_file.write_text(new_toml)
diff --git a/archivebox/abx/archivebox/toml_util.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py
similarity index 100%
rename from archivebox/abx/archivebox/toml_util.py
rename to archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py
diff --git a/archivebox/pkgs/abx-spec-config/pyproject.toml b/archivebox/pkgs/abx-spec-config/pyproject.toml
new file mode 100644
index 00000000..aa2f6eb4
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-config/pyproject.toml
@@ -0,0 +1,20 @@
+[project]
+name = "abx-spec-config"
+version = "0.1.0"
+description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "python-benedict>=0.34.0",
+ "pydantic>=2.9.2",
+ "pydantic-settings>=2.6.0",
+ "rich>=13.9.3",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_config = "abx_spec_config"
diff --git a/archivebox/pkgs/abx-spec-django/README.md b/archivebox/pkgs/abx-spec-django/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-django/abx_spec_django.py b/archivebox/pkgs/abx-spec-django/abx_spec_django.py
new file mode 100644
index 00000000..562dad72
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-django/abx_spec_django.py
@@ -0,0 +1,118 @@
+__order__ = 300
+
+import abx
+from typing import List, Dict, Any, cast
+
+###########################################################################################
+
+class DjangoPluginSpec:
+ @abx.hookspec
+ def get_INSTALLED_APPS() -> List[str]:
+ return ['abx_spec_django']
+
+ @abx.hookspec
+ def get_TEMPLATE_DIRS() -> List[str]:
+ return [] # e.g. ['your_plugin_type/plugin_name/templates']
+
+
+ @abx.hookspec
+ def get_STATICFILES_DIRS() -> List[str]:
+ return [] # e.g. ['your_plugin_type/plugin_name/static']
+
+ # @abx.hookspec
+ # def register_STATICFILES_DIRS(STATICFILES_DIRS):
+ # """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position"""
+ # # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static')
+ # pass
+
+
+ @abx.hookspec
+ def get_MIDDLEWARES() -> List[str]:
+ return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware']
+
+ # @abx.hookspec
+ # def register_MIDDLEWARE(MIDDLEWARE):
+ # """Mutate MIDDLEWARE in place to add your middleware in a specific position"""
+ # # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware')
+ # pass
+
+
+ @abx.hookspec
+ def get_AUTHENTICATION_BACKENDS() -> List[str]:
+ return [] # e.g. ['django_auth_ldap.backend.LDAPBackend']
+
+ # @abx.hookspec
+ # def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
+ # """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position"""
+ # # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend')
+ # pass
+
+ @abx.hookspec
+ def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME) -> Dict[str, Dict[str, Any]]:
+ return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}}
+
+ # @abx.hookspec
+ # def register_DJANGO_HUEY(DJANGO_HUEY):
+ # """Mutate DJANGO_HUEY in place to add your huey queues in a specific position"""
+ # # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value'
+ # pass
+
+
+ @abx.hookspec
+ def get_ADMIN_DATA_VIEWS_URLS() -> List[str]:
+ return []
+
+ # @abx.hookspec
+ # def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
+ # """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position"""
+ # # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py')
+ # pass
+
+
+ # @abx.hookspec
+ # def register_settings(settings):
+ # """Mutate settings in place to add your settings / modify existing settings"""
+ # # settings.SOME_KEY = 'some_value'
+ # pass
+
+
+ ###########################################################################################
+
+ @abx.hookspec
+ def get_urlpatterns() -> List[str]:
+ return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)]
+
+ # @abx.hookspec
+ # def register_urlpatterns(urlpatterns):
+ # """Mutate urlpatterns in place to add your urlpatterns in a specific position"""
+ # # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view))
+ # pass
+
+ ###########################################################################################
+
+
+
+ @abx.hookspec
+ def register_admin(admin_site) -> None:
+ """Register django admin views/models with the main django admin site instance"""
+ # e.g. admin_site.register(your_model, your_admin_class)
+ pass
+
+
+ ###########################################################################################
+
+
+ @abx.hookspec
+ def ready() -> None:
+ """Called when Django apps app.ready() are triggered"""
+ # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate()
+ pass
+
+
+PLUGIN_SPEC = DjangoPluginSpec
+
+class ExpectedPluginSpec(DjangoPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/archivebox/pkgs/abx-spec-django/pyproject.toml b/archivebox/pkgs/abx-spec-django/pyproject.toml
new file mode 100644
index 00000000..09ed31ff
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-django/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-spec-django"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "django>=5.1.1,<6.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_django = "abx_spec_django"
diff --git a/archivebox/pkgs/abx-spec-extractor/README.md b/archivebox/pkgs/abx-spec-extractor/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py b/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py
new file mode 100644
index 00000000..74659467
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py
@@ -0,0 +1,211 @@
+import os
+
+from typing import Optional, List, Annotated, Tuple
+from pathlib import Path
+
+from pydantic import AfterValidator
+from pydantic_pkgr import BinName
+
+
+import abx
+
+
+def assert_no_empty_args(args: List[str]) -> List[str]:
+ assert all(len(arg) for arg in args)
+ return args
+
+ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
+
+HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
+CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
+
+
+@abx.hookspec
+@abx.hookimpl
+def get_EXTRACTORS():
+ return []
+
+@abx.hookspec
+@abx.hookimpl
+def extract(uri: str, config: dict | None=None):
+ return {}
+
+@abx.hookspec(firstresult=True)
+@abx.hookimpl(trylast=True)
+def should_extract(uri: str, extractor: str, config: dict | None=None):
+ return False
+
+
+class BaseExtractor:
+ name: ExtractorName
+ binary: BinName
+
+ default_args: CmdArgsList = []
+ extra_args: CmdArgsList = []
+
+ def get_output_path(self, snapshot) -> Path:
+ return Path(self.__class__.__name__.lower())
+
+ def should_extract(self, uri: str, config: dict | None=None) -> bool:
+ try:
+ assert self.detect_installed_binary().version
+ except Exception:
+ raise
+ # could not load binary
+ return False
+
+ # output_dir = self.get_output_path(snapshot)
+ # if output_dir.glob('*.*'):
+ # return False
+ return True
+
+ # @abx.hookimpl
+ # def extract(self, snapshot_id: str) -> Dict[str, Any]:
+ # from core.models import Snapshot
+ # from archivebox import CONSTANTS
+
+ # snapshot = Snapshot.objects.get(id=snapshot_id)
+
+ # if not self.should_extract(snapshot.url):
+ # return {}
+
+ # status = 'failed'
+ # start_ts = timezone.now()
+ # uplink = self.detect_network_interface()
+ # installed_binary = self.detect_installed_binary()
+ # machine = installed_binary.machine
+ # assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
+
+ # output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
+ # output_dir.mkdir(parents=True, exist_ok=True)
+
+ # # execute the extractor binary with the given args
+ # args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
+ # cmd = [str(installed_binary.abspath), *args]
+ # proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
+
+ # # collect the output
+ # end_ts = timezone.now()
+ # output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
+ # stdout = proc.stdout.strip()
+ # stderr = proc.stderr.strip()
+ # output_json = None
+ # output_text = stdout
+ # try:
+ # output_json = json.loads(stdout.strip())
+ # output_text = None
+ # except json.JSONDecodeError:
+ # pass
+
+ # errors = []
+ # if proc.returncode == 0:
+ # status = 'success'
+ # else:
+ # errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
+
+ # # increment health stats counters
+ # if status == 'success':
+ # machine.record_health_success()
+ # uplink.record_health_success()
+ # installed_binary.record_health_success()
+ # else:
+ # machine.record_health_failure()
+ # uplink.record_health_failure()
+ # installed_binary.record_health_failure()
+
+ # return {
+ # 'extractor': self.name,
+
+ # 'snapshot': {
+ # 'id': snapshot.id,
+ # 'abid': snapshot.abid,
+ # 'url': snapshot.url,
+ # 'created_by_id': snapshot.created_by_id,
+ # },
+
+ # 'machine': {
+ # 'id': machine.id,
+ # 'abid': machine.abid,
+ # 'guid': machine.guid,
+ # 'hostname': machine.hostname,
+ # 'hw_in_docker': machine.hw_in_docker,
+ # 'hw_in_vm': machine.hw_in_vm,
+ # 'hw_manufacturer': machine.hw_manufacturer,
+ # 'hw_product': machine.hw_product,
+ # 'hw_uuid': machine.hw_uuid,
+ # 'os_arch': machine.os_arch,
+ # 'os_family': machine.os_family,
+ # 'os_platform': machine.os_platform,
+ # 'os_release': machine.os_release,
+ # 'os_kernel': machine.os_kernel,
+ # },
+
+ # 'uplink': {
+ # 'id': uplink.id,
+ # 'abid': uplink.abid,
+ # 'mac_address': uplink.mac_address,
+ # 'ip_public': uplink.ip_public,
+ # 'ip_local': uplink.ip_local,
+ # 'dns_server': uplink.dns_server,
+ # 'hostname': uplink.hostname,
+ # 'iface': uplink.iface,
+ # 'isp': uplink.isp,
+ # 'city': uplink.city,
+ # 'region': uplink.region,
+ # 'country': uplink.country,
+ # },
+
+ # 'binary': {
+ # 'id': installed_binary.id,
+ # 'abid': installed_binary.abid,
+ # 'name': installed_binary.name,
+ # 'binprovider': installed_binary.binprovider,
+ # 'abspath': installed_binary.abspath,
+ # 'version': installed_binary.version,
+ # 'sha256': installed_binary.sha256,
+ # },
+
+ # 'cmd': cmd,
+ # 'stdout': stdout,
+ # 'stderr': stderr,
+ # 'returncode': proc.returncode,
+ # 'start_ts': start_ts,
+ # 'end_ts': end_ts,
+
+ # 'status': status,
+ # 'errors': errors,
+ # 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
+ # 'output_files': output_files,
+ # 'output_json': output_json or {},
+ # 'output_text': output_text or '',
+ # }
+
+ # TODO: move this to a hookimpl
+ def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
+ cwd = cwd or Path(os.getcwd())
+ binary = self.load_binary(installed_binary=installed_binary)
+
+ return binary.exec(cmd=args, cwd=cwd)
+
+ # @cached_property
+ @property
+ def BINARY(self):
+ # import abx.archivebox.reads
+ # for binary in abx.archivebox.reads.get_BINARIES().values():
+ # if binary.name == self.binary:
+ # return binary
+ raise ValueError(f'Binary {self.binary} not found')
+
+ def detect_installed_binary(self):
+ from machine.models import InstalledBinary
+ # hydrates binary from DB/cache if record of installed version is recent enough
+ # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
+ return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
+
+ def load_binary(self, installed_binary=None):
+ installed_binary = installed_binary or self.detect_installed_binary()
+ return installed_binary.load_from_db()
+
+ # def detect_network_interface(self):
+ # from machine.models import NetworkInterface
+ # return NetworkInterface.objects.current()
diff --git a/archivebox/pkgs/abx-spec-extractor/pyproject.toml b/archivebox/pkgs/abx-spec-extractor/pyproject.toml
new file mode 100644
index 00000000..5d49fef2
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-extractor/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-spec-extractor"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "python-benedict>=0.26.0",
+ "pydantic>=2.5.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_extractor = "abx_spec_extractor"
diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/README.md b/archivebox/pkgs/abx-spec-pydantic-pkgr/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
new file mode 100644
index 00000000..b95b3f33
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py
@@ -0,0 +1,114 @@
+__order__ = 200
+
+import os
+
+from typing import Dict, cast
+from pathlib import Path
+
+from pydantic_pkgr import Binary, BinProvider
+
+import abx
+
+from abx_spec_config import ConfigPluginSpec
+
+###########################################################################################
+
+class PydanticPkgrPluginSpec:
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_LIB_DIR(self) -> Path:
+ """Get the directory where shared runtime libraries/dependencies should be installed"""
+ FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
+ LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
+ return LIB_DIR
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BIN_DIR(self) -> Path:
+ """Get the directory where binaries should be symlinked to"""
+ FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
+ LIB_DIR = pm.hook.get_LIB_DIR()
+ BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
+ return BIN_DIR
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
+ return {
+ # to be implemented by plugins, e.g.:
+ # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
+ }
+
+ @abx.hookspec
+ @abx.hookimpl
+ def get_BINARIES(self) -> Dict[str, Binary]:
+ return {
+ # to be implemented by plugins, e.g.:
+ # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
+ }
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
+ """Get a specific BinProvider by name"""
+ return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def get_BINARY(self, bin_name: str) -> Binary:
+ """Get a specific Binary by name"""
+ return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
+
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_load(self, binary: Binary, **kwargs) -> Binary:
+ """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
+ loaded_binary = binary.load(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_install(self, binary: Binary, **kwargs) -> Binary:
+ """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
+ loaded_binary = binary.install(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
+ """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
+ loaded_binary = binary.load_or_install(**kwargs)
+ pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
+ return loaded_binary
+
+ @abx.hookspec(firstresult=True)
+ @abx.hookimpl
+ def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
+ if not (binary.abspath and os.path.isfile(binary.abspath)):
+ return
+
+ BIN_DIR = pm.hook.get_BIN_DIR()
+ try:
+ BIN_DIR.mkdir(parents=True, exist_ok=True)
+ symlink = BIN_DIR / binary.name
+ symlink.unlink(missing_ok=True)
+ symlink.symlink_to(binary.abspath)
+ symlink.chmod(0o777) # make sure its executable by everyone
+ except Exception:
+ # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
+ # not actually needed, we can just run without it
+ pass
+
+
+PLUGIN_SPEC = PydanticPkgrPluginSpec
+
+
+class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
+ pass
+
+TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml b/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml
new file mode 100644
index 00000000..67f1f62f
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "abx-spec-pydantic-pkgr"
+version = "0.1.0"
+description = "The ABX plugin specification for Binaries and BinProviders"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "pydantic-pkgr>=0.5.4",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr"
diff --git a/archivebox/pkgs/abx-spec-searchbackend/README.md b/archivebox/pkgs/abx-spec-searchbackend/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py b/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py
new file mode 100644
index 00000000..8bc53eb8
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py
@@ -0,0 +1,40 @@
+import abc
+from typing import Iterable, List, Dict, cast
+
+import abx
+from abx_spec_config import ConfigPluginSpec
+
+
+class BaseSearchBackend(abc.ABC):
+ name: str
+
+ @staticmethod
+ @abc.abstractmethod
+ def index(snapshot_id: str, texts: List[str]):
+ return
+
+ @staticmethod
+ @abc.abstractmethod
+ def flush(snapshot_ids: Iterable[str]):
+ return
+
+ @staticmethod
+ @abc.abstractmethod
+ def search(text: str) -> List[str]:
+ raise NotImplementedError("search method must be implemented by subclass")
+
+
+class SearchBackendPluginSpec:
+ @abx.hookspec
+ @abx.hookimpl
+ def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:
+ return {}
+
+
+class ExpectedPluginSpec(SearchBackendPluginSpec, ConfigPluginSpec):
+ pass
+
+PLUGIN_SPEC = SearchBackendPluginSpec
+
+TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec]
+pm = cast(TypedPluginManager, abx.pm)
diff --git a/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml b/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml
new file mode 100644
index 00000000..2a9ac3ce
--- /dev/null
+++ b/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml
@@ -0,0 +1,18 @@
+[project]
+name = "abx-spec-searchbackend"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "abx>=0.1.0",
+ "python-benedict>=0.26.0",
+ "pydantic>=2.5.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project.entry-points.abx]
+abx_spec_searchbackend = "abx_spec_searchbackend"
diff --git a/archivebox/pkgs/abx/README.md b/archivebox/pkgs/abx/README.md
new file mode 100644
index 00000000..e69de29b
diff --git a/archivebox/pkgs/abx/abx.py b/archivebox/pkgs/abx/abx.py
new file mode 100644
index 00000000..de4f0046
--- /dev/null
+++ b/archivebox/pkgs/abx/abx.py
@@ -0,0 +1,484 @@
+__package__ = 'abx'
+__id__ = 'abx'
+__label__ = 'ABX'
+__author__ = 'Nick Sweeting'
+__homepage__ = 'https://github.com/ArchiveBox'
+__order__ = 0
+
+
+import inspect
+import importlib
+import itertools
+from pathlib import Path
+from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypeVar, TypedDict, Type, cast, Generic, Mapping, overload, Final, ParamSpec, Literal, Protocol
+from types import ModuleType
+from typing_extensions import Annotated
+from functools import cache
+
+from benedict import benedict
+from pydantic import AfterValidator
+
+from pluggy import HookimplMarker, PluginManager, HookimplOpts, HookspecOpts, HookCaller
+
+
+
+ParamsT = ParamSpec("ParamsT")
+ReturnT = TypeVar('ReturnT')
+
+class HookSpecDecoratorThatReturnsFirstResult(Protocol):
+ def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, ReturnT]: ...
+
+class HookSpecDecoratorThatReturnsListResults(Protocol):
+ def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, List[ReturnT]]: ...
+
+
+class TypedHookspecMarker:
+ """
+ Improved version of pluggy.HookspecMarker that supports type inference of hookspecs with firstresult=True|False correctly
+ https://github.com/pytest-dev/pluggy/issues/191
+ """
+
+ __slots__ = ('project_name',)
+
+ def __init__(self, project_name: str) -> None:
+ self.project_name: Final[str] = project_name
+
+ # handle @hookspec(firstresult=False) -> List[ReturnT] (test_firstresult_False_hookspec)
+ @overload
+ def __call__(
+ self,
+ function: None = ...,
+ firstresult: Literal[False] = ...,
+ historic: bool = ...,
+ warn_on_impl: Warning | None = ...,
+ warn_on_impl_args: Mapping[str, Warning] | None = ...,
+ ) -> HookSpecDecoratorThatReturnsListResults: ...
+
+ # handle @hookspec(firstresult=True) -> ReturnT (test_firstresult_True_hookspec)
+ @overload
+ def __call__(
+ self,
+ function: None = ...,
+ firstresult: Literal[True] = ...,
+ historic: bool = ...,
+ warn_on_impl: Warning | None = ...,
+ warn_on_impl_args: Mapping[str, Warning] | None = ...,
+ ) -> HookSpecDecoratorThatReturnsFirstResult: ...
+
+ # handle @hookspec -> List[ReturnT] (test_normal_hookspec)
+ # order matters!!! this one has to come last
+ @overload
+ def __call__(
+ self,
+ function: Callable[ParamsT, ReturnT] = ...,
+ firstresult: Literal[False] = ...,
+ historic: bool = ...,
+ warn_on_impl: None = ...,
+ warn_on_impl_args: None = ...,
+ ) -> Callable[ParamsT, List[ReturnT]]: ...
+
+ def __call__(
+ self,
+ function: Callable[ParamsT, ReturnT] | None = None,
+ firstresult: bool = False,
+ historic: bool = False,
+ warn_on_impl: Warning | None = None,
+ warn_on_impl_args: Mapping[str, Warning] | None = None,
+ ) -> Callable[ParamsT, List[ReturnT]] | HookSpecDecoratorThatReturnsListResults | HookSpecDecoratorThatReturnsFirstResult:
+
+ def setattr_hookspec_opts(func) -> Callable:
+ if historic and firstresult:
+ raise ValueError("cannot have a historic firstresult hook")
+ opts: HookspecOpts = {
+ "firstresult": firstresult,
+ "historic": historic,
+ "warn_on_impl": warn_on_impl,
+ "warn_on_impl_args": warn_on_impl_args,
+ }
+ setattr(func, self.project_name + "_spec", opts)
+ return func
+
+ if function is not None:
+ return setattr_hookspec_opts(function)
+ else:
+ return setattr_hookspec_opts
+
+
+
+
+spec = hookspec = TypedHookspecMarker("abx")
+impl = hookimpl = HookimplMarker("abx")
+
+
+def is_valid_attr_name(x: str) -> str:
+ assert x.isidentifier() and not x.startswith('_')
+ return x
+
+def is_valid_module_name(x: str) -> str:
+ assert x.isidentifier() and not x.startswith('_') and x.islower()
+ return x
+
+AttrName = Annotated[str, AfterValidator(is_valid_attr_name)]
+PluginId = Annotated[str, AfterValidator(is_valid_module_name)]
+
+
+class PluginInfo(TypedDict, total=True):
+ id: PluginId
+ package: AttrName
+ label: str
+ version: str
+ author: str
+ homepage: str
+ dependencies: List[str]
+
+ source_code: str
+ hooks: Dict[AttrName, Callable]
+ module: ModuleType
+
+
+
+PluginSpec = TypeVar("PluginSpec")
+
+class ABXPluginManager(PluginManager, Generic[PluginSpec]):
+ """
+ Patch to fix pluggy's PluginManager to work with pydantic models.
+ See: https://github.com/pytest-dev/pluggy/pull/536
+ """
+
+ # enable static type checking of pm.hook.call() calls
+ # https://stackoverflow.com/a/62871889/2156113
+ # https://github.com/pytest-dev/pluggy/issues/191
+ hook: PluginSpec
+
+ def create_typed_hookcaller(self, name: str, module_or_class: Type[PluginSpec], spec_opts: HookspecOpts) -> HookCaller:
+ """
+ create a new HookCaller subclass with a modified __signature__
+ so that the return type is correct and args are converted to kwargs
+ """
+ TypedHookCaller = type('TypedHookCaller', (HookCaller,), {})
+
+ hookspec_signature = inspect.signature(getattr(module_or_class, name))
+ hookspec_return_type = hookspec_signature.return_annotation
+
+ # replace return type with list if firstresult=False
+ hookcall_return_type = hookspec_return_type if spec_opts['firstresult'] else List[hookspec_return_type]
+
+ # replace each arg with kwarg equivalent (pm.hook.call() only accepts kwargs)
+ args_as_kwargs = [
+ param.replace(kind=inspect.Parameter.KEYWORD_ONLY) if param.name != 'self' else param
+ for param in hookspec_signature.parameters.values()
+ ]
+ TypedHookCaller.__signature__ = hookspec_signature.replace(parameters=args_as_kwargs, return_annotation=hookcall_return_type)
+ TypedHookCaller.__name__ = f'{name}_HookCaller'
+
+ return TypedHookCaller(name, self._hookexec, module_or_class, spec_opts)
+
+ def add_hookspecs(self, module_or_class: Type[PluginSpec]) -> None:
+ """Add HookSpecs from the given class, (generic type allows us to enforce types of pm.hook.call() statically)"""
+ names = []
+ for name in dir(module_or_class):
+ spec_opts = self.parse_hookspec_opts(module_or_class, name)
+ if spec_opts is not None:
+ hc: HookCaller | None = getattr(self.hook, name, None)
+ if hc is None:
+ hc = self.create_typed_hookcaller(name, module_or_class, spec_opts)
+ setattr(self.hook, name, hc)
+ else:
+ # Plugins registered this hook without knowing the spec.
+ hc.set_specification(module_or_class, spec_opts)
+ for hookfunction in hc.get_hookimpls():
+ self._verify_hook(hc, hookfunction)
+ names.append(name)
+
+ if not names:
+ raise ValueError(
+ f"did not find any {self.project_name!r} hooks in {module_or_class!r}"
+ )
+
+ def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None:
+ # IMPORTANT: @property methods can have side effects, and are never hookimpl
+ # if attr is a property, skip it in advance
+ # plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
+ if isinstance(getattr(plugin, name, None), property):
+ return None
+
+ try:
+ return super().parse_hookimpl_opts(plugin, name)
+ except AttributeError:
+ return None
+
+
+pm = ABXPluginManager("abx")
+
+
+
+def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]:
+ assert plugin
+ plugin_module = None
+ plugin_dir = None
+
+ if isinstance(plugin, str) or isinstance(plugin, Path):
+ if str(plugin).endswith('.py'):
+ plugin_dir = Path(plugin).parent
+ elif '/' in str(plugin):
+ # assume it's a path to a plugin directory
+ plugin_dir = Path(plugin)
+ elif str(plugin).isidentifier():
+ pass
+
+ elif inspect.ismodule(plugin):
+ plugin_module = plugin
+ plugin_dir = Path(str(plugin_module.__file__)).parent
+ elif inspect.isclass(plugin):
+ plugin_module = plugin
+ plugin_dir = Path(inspect.getfile(plugin)).parent
+ else:
+ raise ValueError(f'Invalid plugin, cannot get order: {plugin}')
+
+ if plugin_dir:
+ try:
+ # if .plugin_order file exists, use it to set the load priority
+ order = int((plugin_dir / '.plugin_order').read_text())
+ assert -1000000 < order < 100000000
+ return (order, plugin_dir)
+ except FileNotFoundError:
+ pass
+
+ if plugin_module:
+ order = getattr(plugin_module, '__order__', 999)
+ else:
+ order = 999
+
+ assert order is not None
+ assert plugin_dir
+
+ return (order, plugin_dir)
+
+
+# @cache
+def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
+ assert plugin
+
+ # import the plugin module by its name
+ if isinstance(plugin, str):
+ module = importlib.import_module(plugin)
+ # print('IMPORTED PLUGIN:', plugin)
+ plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module))
+ elif inspect.ismodule(plugin):
+ module = plugin
+ plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module))
+ elif inspect.isclass(plugin):
+ module = inspect.getmodule(plugin)
+ else:
+ raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
+
+ assert module
+
+ plugin_file = Path(inspect.getfile(module))
+ plugin_package = module.__package__ or module.__name__
+ plugin_id = plugin_package.replace('.', '_')
+
+ # load the plugin info from the plugin/__init__.py __attr__s if they exist
+ plugin_module_attrs = {
+ 'label': getattr(module, '__label__', plugin_id),
+ 'version': getattr(module, '__version__', '0.0.1'),
+ 'author': getattr(module, '__author__', 'ArchiveBox'),
+ 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'),
+ 'dependencies': getattr(module, '__dependencies__', []),
+ }
+
+ # load the plugin info from the plugin/pyproject.toml file if it has one
+ plugin_toml_info = {}
+ try:
+ # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir
+ plugin_toml_info = benedict.from_toml((plugin_file.parent / 'pyproject.toml').read_text()).project
+ except Exception:
+ try:
+ # try loading ../pyproject.toml next in case the plugin is in a packge dir
+ plugin_toml_info = benedict.from_toml((plugin_file.parent.parent / 'pyproject.toml').read_text()).project
+ except Exception:
+ # print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, plugin_file.parent, 'ERROR:', e)
+ pass
+
+
+ assert plugin_id
+ assert plugin_package
+ assert module.__file__
+
+ # merge the plugin info from all sources + add dyanmically calculated info
+ return cast(PluginInfo, benedict(PluginInfo(**{
+ 'id': plugin_id,
+ **plugin_module_attrs,
+ **plugin_toml_info,
+ 'package': plugin_package,
+ 'source_code': module.__file__,
+ 'order': get_plugin_order(plugin),
+ 'hooks': get_plugin_hooks(plugin),
+ 'module': module,
+ 'plugin': plugin,
+ })))
+
+
+def get_all_plugins() -> Dict[PluginId, PluginInfo]:
+ """Get the metadata for all the plugins registered with Pluggy."""
+ plugins = {}
+ for plugin_module in pm.get_plugins():
+ plugin_info = get_plugin(plugin=plugin_module)
+ assert 'id' in plugin_info
+ plugins[plugin_info['id']] = plugin_info
+ return benedict(plugins)
+
+
+def get_all_hook_names() -> Set[str]:
+ """Get a set of all hook names across all plugins"""
+ return {
+ hook_name
+ for plugin_module in pm.get_plugins()
+ for hook_name in get_plugin_hooks(plugin_module)
+ }
+
+
+def get_all_hook_specs() -> Dict[str, Dict[str, Any]]:
+ """Get a set of all hookspec methods defined in all plugins (useful for type checking if a pm.hook.call() is valid)"""
+ hook_specs = {}
+
+ for hook_name in get_all_hook_names():
+ for plugin_module in pm.get_plugins():
+ if hasattr(plugin_module, hook_name):
+ hookspecopts = pm.parse_hookspec_opts(plugin_module, hook_name)
+ if hookspecopts:
+ method = getattr(plugin_module, hook_name)
+ signature = inspect.signature(method)
+ return_type = signature.return_annotation if signature.return_annotation != inspect._empty else None
+
+ if hookspecopts.get('firstresult'):
+ return_type = return_type
+ else:
+ # if not firstresult, return_type is a sequence
+ return_type = List[return_type]
+
+ call_signature = signature.replace(return_annotation=return_type)
+ method = lambda *args, **kwargs: getattr(pm.hook, hook_name)(*args, **kwargs)
+ method.__signature__ = call_signature
+ method.__name__ = hook_name
+ method.__package__ = plugin_module.__package__
+
+ hook_specs[hook_name] = {
+ 'name': hook_name,
+ 'method': method,
+ 'signature': call_signature,
+ 'hookspec_opts': hookspecopts,
+ 'hookspec_signature': signature,
+ 'hookspec_plugin': plugin_module.__package__,
+ }
+ return hook_specs
+
+
+
+###### PLUGIN DISCOVERY AND LOADING ########################################################
+
+
+def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]:
+ """
+ Find all the plugins in a given directory. Just looks for an __init__.py file.
+ """
+ python_dirs = plugins_dir.glob("*/__init__.py")
+ sorted_python_dirs = sorted(python_dirs, key=lambda p: get_plugin_order(plugin=p) or 500)
+
+ return {
+ plugin_entrypoint.parent.name: plugin_entrypoint.parent
+ for plugin_entrypoint in sorted_python_dirs
+ if plugin_entrypoint.parent.name not in ('abx', 'core')
+ }
+
+
+def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]:
+ """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
+ import importlib.metadata
+
+ DETECTED_PLUGINS = {} # module_name: module_dir_path
+ for dist in list(importlib.metadata.distributions()):
+ for entrypoint in dist.entry_points:
+ if entrypoint.group != group or pm.is_blocked(entrypoint.name):
+ continue
+ DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
+ # pm.register(plugin, name=ep.name)
+ # pm._plugin_distinfo.append((plugin, DistFacade(dist)))
+ return DETECTED_PLUGINS
+
+
+
+# Load all plugins from pip packages, archivebox built-ins, and user plugins
+def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]):
+ """
+ Load all the plugins from a dictionary of module names and directory paths.
+ """
+ PLUGINS_TO_LOAD = []
+ LOADED_PLUGINS = {}
+
+ for plugin in plugins:
+ plugin_info = get_plugin(plugin)
+ assert plugin_info, f'No plugin metadata found for {plugin}'
+ assert 'id' in plugin_info and 'module' in plugin_info
+ if plugin_info['module'] in pm.get_plugins():
+ LOADED_PLUGINS[plugin_info['id']] = plugin_info
+ continue
+ else:
+ PLUGINS_TO_LOAD.append(plugin_info)
+
+ PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order'])
+
+ for plugin_info in PLUGINS_TO_LOAD:
+ pm.register(plugin_info['module'])
+ LOADED_PLUGINS[plugin_info['id']] = plugin_info
+ # print(f' √ Loaded plugin: {plugin_id}')
+ return benedict(LOADED_PLUGINS)
+
+@cache
+def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]:
+ """Get all the functions marked with @hookimpl on a module."""
+ if not plugin:
+ return {}
+
+ hooks = {}
+
+ if isinstance(plugin, str):
+ plugin_module = importlib.import_module(plugin)
+ elif inspect.ismodule(plugin) or inspect.isclass(plugin):
+ plugin_module = plugin
+ else:
+ raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}')
+
+ for attr_name in dir(plugin_module):
+ if attr_name.startswith('_'):
+ continue
+ try:
+ attr = getattr(plugin_module, attr_name)
+ if isinstance(attr, Callable):
+ if pm.parse_hookimpl_opts(plugin_module, attr_name):
+ hooks[attr_name] = attr
+ except Exception as e:
+ print(f'Error getting hookimpls for {plugin}: {e}')
+
+ return hooks
+
+ReturnT = TypeVar('ReturnT')
+
+def as_list(results: List[List[ReturnT]]) -> List[ReturnT]:
+ """Flatten a list of lists returned by a pm.hook.call() into a single list"""
+ return list(itertools.chain(*results))
+
+
+def as_dict(results: List[Dict[PluginId, ReturnT]]) -> Dict[PluginId, ReturnT]:
+ """Flatten a list of dicts returned by a pm.hook.call() into a single dict"""
+
+ if isinstance(results, (dict, benedict)):
+ results_list = results.values()
+ else:
+ results_list = results
+
+ return benedict({
+ result_id: result
+ for plugin_results in results_list
+ for result_id, result in plugin_results.items()
+ })
diff --git a/archivebox/pkgs/abx/pyproject.toml b/archivebox/pkgs/abx/pyproject.toml
new file mode 100644
index 00000000..3c185653
--- /dev/null
+++ b/archivebox/pkgs/abx/pyproject.toml
@@ -0,0 +1,14 @@
+[project]
+name = "abx"
+version = "0.1.0"
+description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem."
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+ "pluggy>=1.5.0",
+ "django>=5.1.1,<6.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
diff --git a/archivebox/plugins_extractor/archivedotorg/__init__.py b/archivebox/plugins_extractor/archivedotorg/__init__.py
deleted file mode 100644
index a5c24932..00000000
--- a/archivebox/plugins_extractor/archivedotorg/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_extractor.archivedotorg'
-__label__ = 'archivedotorg'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://archive.org'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'archivedotorg': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import ARCHIVEDOTORG_CONFIG
-
- return {
- 'archivedotorg': ARCHIVEDOTORG_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import ARCHIVEDOTORG_EXTRACTOR
-#
-# return {
-# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR,
-# }
diff --git a/archivebox/plugins_extractor/chrome/__init__.py b/archivebox/plugins_extractor/chrome/__init__.py
deleted file mode 100644
index 016cd292..00000000
--- a/archivebox/plugins_extractor/chrome/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-__package__ = 'plugins_extractor.chrome'
-__id__ = 'chrome'
-__label__ = 'Chrome'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import CHROME_CONFIG
-
- return {
- __id__: CHROME_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import CHROME_BINARY
-
- return {
- 'chrome': CHROME_BINARY,
- }
-
-@abx.hookimpl
-def ready():
- from .config import CHROME_CONFIG
- CHROME_CONFIG.validate()
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# return {
-# 'pdf': PDF_EXTRACTOR,
-# 'screenshot': SCREENSHOT_EXTRACTOR,
-# 'dom': DOM_EXTRACTOR,
-# }
diff --git a/archivebox/plugins_extractor/curl/__init__.py b/archivebox/plugins_extractor/curl/__init__.py
deleted file mode 100644
index 99af0107..00000000
--- a/archivebox/plugins_extractor/curl/__init__.py
+++ /dev/null
@@ -1,38 +0,0 @@
-__package__ = 'plugins_extractor.curl'
-__label__ = 'curl'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/curl/curl'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'curl': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import CURL_CONFIG
-
- return {
- 'curl': CURL_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import CURL_BINARY
-
- return {
- 'curl': CURL_BINARY,
- }
diff --git a/archivebox/plugins_extractor/favicon/__init__.py b/archivebox/plugins_extractor/favicon/__init__.py
deleted file mode 100644
index 3fa84560..00000000
--- a/archivebox/plugins_extractor/favicon/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_extractor.favicon'
-__label__ = 'favicon'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'favicon': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import FAVICON_CONFIG
-
- return {
- 'favicon': FAVICON_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import FAVICON_EXTRACTOR
-
-# return {
-# 'favicon': FAVICON_EXTRACTOR,
-# }
diff --git a/archivebox/plugins_extractor/git/__init__.py b/archivebox/plugins_extractor/git/__init__.py
deleted file mode 100644
index db18919f..00000000
--- a/archivebox/plugins_extractor/git/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.git'
-__label__ = 'git'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/git/git'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'git': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import GIT_CONFIG
-
- return {
- 'git': GIT_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import GIT_BINARY
-
- return {
- 'git': GIT_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import GIT_EXTRACTOR
-
- return {
- 'git': GIT_EXTRACTOR,
- }
diff --git a/archivebox/plugins_extractor/git/extractors.py b/archivebox/plugins_extractor/git/extractors.py
deleted file mode 100644
index 350f1b82..00000000
--- a/archivebox/plugins_extractor/git/extractors.py
+++ /dev/null
@@ -1,17 +0,0 @@
-__package__ = 'plugins_extractor.git'
-
-from pathlib import Path
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import GIT_BINARY
-
-
-class GitExtractor(BaseExtractor):
- name: ExtractorName = 'git'
- binary: str = GIT_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- return snapshot.as_link() / 'git'
-
-GIT_EXTRACTOR = GitExtractor()
diff --git a/archivebox/plugins_extractor/htmltotext/__init__.py b/archivebox/plugins_extractor/htmltotext/__init__.py
deleted file mode 100644
index 0f2b756c..00000000
--- a/archivebox/plugins_extractor/htmltotext/__init__.py
+++ /dev/null
@@ -1,41 +0,0 @@
-__package__ = 'plugins_extractor.htmltotext'
-__id__ = 'htmltotext'
-__label__ = 'HTML-to-Text'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import HTMLTOTEXT_CONFIG
-
- return {
- __id__: HTMLTOTEXT_CONFIG
- }
-
-
-# @abx.hookimpl
-# def get_EXTRACTORS():
-# from .extractors import FAVICON_EXTRACTOR
-
-# return {
-# 'htmltotext': FAVICON_EXTRACTOR,
-# }
diff --git a/archivebox/plugins_extractor/mercury/__init__.py b/archivebox/plugins_extractor/mercury/__init__.py
deleted file mode 100644
index 10aca671..00000000
--- a/archivebox/plugins_extractor/mercury/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.mercury'
-__label__ = 'mercury'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/postlight/mercury-parser'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'mercury': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import MERCURY_CONFIG
-
- return {
- 'mercury': MERCURY_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import MERCURY_BINARY
-
- return {
- 'mercury': MERCURY_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import MERCURY_EXTRACTOR
-
- return {
- 'mercury': MERCURY_EXTRACTOR,
- }
diff --git a/archivebox/plugins_extractor/mercury/extractors.py b/archivebox/plugins_extractor/mercury/extractors.py
deleted file mode 100644
index 5d91b0e0..00000000
--- a/archivebox/plugins_extractor/mercury/extractors.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__package__ = 'plugins_extractor.mercury'
-
-from pathlib import Path
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import MERCURY_BINARY
-
-
-
-class MercuryExtractor(BaseExtractor):
- name: ExtractorName = 'mercury'
- binary: str = MERCURY_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- return snapshot.link_dir / 'mercury' / 'content.html'
-
-
-MERCURY_EXTRACTOR = MercuryExtractor()
diff --git a/archivebox/plugins_extractor/pocket/__init__.py b/archivebox/plugins_extractor/pocket/__init__.py
deleted file mode 100644
index bf09435f..00000000
--- a/archivebox/plugins_extractor/pocket/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.pocket'
-__id__ = 'pocket'
-__label__ = 'pocket'
-__version__ = '2024.10.21'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import POCKET_CONFIG
-
- return {
- __id__: POCKET_CONFIG
- }
-
-@abx.hookimpl
-def ready():
- from .config import POCKET_CONFIG
- POCKET_CONFIG.validate()
diff --git a/archivebox/plugins_extractor/readability/__init__.py b/archivebox/plugins_extractor/readability/__init__.py
deleted file mode 100644
index 2ef1a1a8..00000000
--- a/archivebox/plugins_extractor/readability/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_extractor.readability'
-__label__ = 'readability'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/readability-extractor'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'readability': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import READABILITY_CONFIG
-
- return {
- 'readability': READABILITY_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import READABILITY_BINARY
-
- return {
- 'readability': READABILITY_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import READABILITY_EXTRACTOR
-
- return {
- 'readability': READABILITY_EXTRACTOR,
- }
diff --git a/archivebox/plugins_extractor/readability/extractors.py b/archivebox/plugins_extractor/readability/extractors.py
deleted file mode 100644
index eb8ea165..00000000
--- a/archivebox/plugins_extractor/readability/extractors.py
+++ /dev/null
@@ -1,20 +0,0 @@
-__package__ = 'plugins_extractor.readability'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-
-from abx.archivebox.base_extractor import BaseExtractor
-
-from .binaries import READABILITY_BINARY
-
-
-class ReadabilityExtractor(BaseExtractor):
- name: str = 'readability'
- binary: BinName = READABILITY_BINARY.name
-
- def get_output_path(self, snapshot) -> Path:
- return Path(snapshot.link_dir) / 'readability' / 'content.html'
-
-
-READABILITY_EXTRACTOR = ReadabilityExtractor()
diff --git a/archivebox/plugins_extractor/readwise/__init__.py b/archivebox/plugins_extractor/readwise/__init__.py
deleted file mode 100644
index 002eb58b..00000000
--- a/archivebox/plugins_extractor/readwise/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.readwise'
-__id__ = 'readwise'
-__label__ = 'readwise'
-__version__ = '2024.10.21'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import READWISE_CONFIG
-
- return {
- __id__: READWISE_CONFIG
- }
-
-@abx.hookimpl
-def ready():
- from .config import READWISE_CONFIG
- READWISE_CONFIG.validate()
diff --git a/archivebox/plugins_extractor/readwise/config.py b/archivebox/plugins_extractor/readwise/config.py
deleted file mode 100644
index 106aaf06..00000000
--- a/archivebox/plugins_extractor/readwise/config.py
+++ /dev/null
@@ -1,17 +0,0 @@
-__package__ = 'plugins_extractor.readwise'
-
-from typing import Dict
-from pathlib import Path
-
-from pydantic import Field
-
-from abx.archivebox.base_configset import BaseConfigSet
-
-from archivebox.config import CONSTANTS
-
-
-class ReadwiseConfig(BaseConfigSet):
- READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db")
- READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...}
-
-READWISE_CONFIG = ReadwiseConfig()
diff --git a/archivebox/plugins_extractor/singlefile/__init__.py b/archivebox/plugins_extractor/singlefile/__init__.py
deleted file mode 100644
index cd72adb8..00000000
--- a/archivebox/plugins_extractor/singlefile/__init__.py
+++ /dev/null
@@ -1,51 +0,0 @@
-__package__ = 'plugins_extractor.singlefile'
-__label__ = 'singlefile'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/gildas-lormeau/singlefile'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'singlefile': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import SINGLEFILE_CONFIG
-
- return {
- 'singlefile': SINGLEFILE_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import SINGLEFILE_BINARY
-
- return {
- 'singlefile': SINGLEFILE_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import SINGLEFILE_EXTRACTOR
-
- return {
- 'singlefile': SINGLEFILE_EXTRACTOR,
- }
-
-# @abx.hookimpl
-# def get_INSTALLED_APPS():
-# # needed to load ./models.py
-# return [__package__]
diff --git a/archivebox/plugins_extractor/singlefile/extractors.py b/archivebox/plugins_extractor/singlefile/extractors.py
deleted file mode 100644
index fedbe801..00000000
--- a/archivebox/plugins_extractor/singlefile/extractors.py
+++ /dev/null
@@ -1,19 +0,0 @@
-__package__ = 'plugins_extractor.singlefile'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-from abx.archivebox.base_extractor import BaseExtractor
-
-from .binaries import SINGLEFILE_BINARY
-
-
-class SinglefileExtractor(BaseExtractor):
- name: str = 'singlefile'
- binary: BinName = SINGLEFILE_BINARY.name
-
- def get_output_path(self, snapshot) -> Path:
- return Path(snapshot.link_dir) / 'singlefile.html'
-
-
-SINGLEFILE_EXTRACTOR = SinglefileExtractor()
diff --git a/archivebox/plugins_extractor/wget/__init__.py b/archivebox/plugins_extractor/wget/__init__.py
deleted file mode 100644
index e2a36aa4..00000000
--- a/archivebox/plugins_extractor/wget/__init__.py
+++ /dev/null
@@ -1,54 +0,0 @@
-__package__ = 'plugins_extractor.wget'
-__id__ = 'wget'
-__label__ = 'WGET'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/wget'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- __id__: {
- 'id': __id__,
- 'package': __package__,
- 'label': __label__,
- 'version': __version__,
- 'author': __author__,
- 'homepage': __homepage__,
- 'dependencies': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import WGET_CONFIG
-
- return {
- __id__: WGET_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import WGET_BINARY
-
- return {
- 'wget': WGET_BINARY,
- }
-
-@abx.hookimpl
-def get_EXTRACTORS():
- from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR
-
- return {
- 'wget': WGET_EXTRACTOR,
- 'warc': WARC_EXTRACTOR,
- }
-
-@abx.hookimpl
-def ready():
- from .config import WGET_CONFIG
- WGET_CONFIG.validate()
diff --git a/archivebox/plugins_extractor/wget/extractors.py b/archivebox/plugins_extractor/wget/extractors.py
deleted file mode 100644
index 86fa3923..00000000
--- a/archivebox/plugins_extractor/wget/extractors.py
+++ /dev/null
@@ -1,37 +0,0 @@
-__package__ = 'plugins_extractor.wget'
-
-from pathlib import Path
-
-from pydantic_pkgr import BinName
-
-from abx.archivebox.base_extractor import BaseExtractor, ExtractorName
-
-from .binaries import WGET_BINARY
-from .wget_util import wget_output_path
-
-class WgetExtractor(BaseExtractor):
- name: ExtractorName = 'wget'
- binary: BinName = WGET_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- wget_index_path = wget_output_path(snapshot.as_link())
- if wget_index_path:
- return Path(wget_index_path)
- return None
-
-WGET_EXTRACTOR = WgetExtractor()
-
-
-class WarcExtractor(BaseExtractor):
- name: ExtractorName = 'warc'
- binary: BinName = WGET_BINARY.name
-
- def get_output_path(self, snapshot) -> Path | None:
- warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
- if warc_files:
- return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
- return None
-
-
-WARC_EXTRACTOR = WarcExtractor()
-
diff --git a/archivebox/plugins_pkg/npm/binproviders.py b/archivebox/plugins_pkg/npm/binproviders.py
deleted file mode 100644
index b1b83168..00000000
--- a/archivebox/plugins_pkg/npm/binproviders.py
+++ /dev/null
@@ -1,42 +0,0 @@
-__package__ = 'plugins_pkg.npm'
-
-from pathlib import Path
-from typing import Optional
-
-from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
-
-from archivebox.config import DATA_DIR, CONSTANTS
-
-from abx.archivebox.base_binary import BaseBinProvider
-
-
-
-OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
-NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
-
-
-class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
- name: BinProviderName = "sys_npm"
-
- npm_prefix: Optional[Path] = None
-
-
-class LibNpmBinProvider(NpmProvider, BaseBinProvider):
- name: BinProviderName = "lib_npm"
- PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
-
- npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
-
- def setup(self) -> None:
- # update paths from config if they arent the default
- from archivebox.config.common import STORAGE_CONFIG
- if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
- self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
- self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
-
- super().setup()
-
-
-SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
-LIB_NPM_BINPROVIDER = LibNpmBinProvider()
-npm = LIB_NPM_BINPROVIDER
diff --git a/archivebox/plugins_pkg/puppeteer/__init__.py b/archivebox/plugins_pkg/puppeteer/__init__.py
deleted file mode 100644
index 7acc5b1b..00000000
--- a/archivebox/plugins_pkg/puppeteer/__init__.py
+++ /dev/null
@@ -1,46 +0,0 @@
-__package__ = 'plugins_pkg.puppeteer'
-__label__ = 'puppeteer'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/puppeteer/puppeteer'
-__dependencies__ = ['npm']
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'puppeteer': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import PUPPETEER_CONFIG
-
- return {
- 'puppeteer': PUPPETEER_CONFIG
- }
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import PUPPETEER_BINARY
-
- return {
- 'puppeteer': PUPPETEER_BINARY,
- }
-
-@abx.hookimpl
-def get_BINPROVIDERS():
- from .binproviders import PUPPETEER_BINPROVIDER
-
- return {
- 'puppeteer': PUPPETEER_BINPROVIDER,
- }
diff --git a/archivebox/plugins_search/ripgrep/__init__.py b/archivebox/plugins_search/ripgrep/__init__.py
deleted file mode 100644
index ac1e417c..00000000
--- a/archivebox/plugins_search/ripgrep/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-__package__ = 'plugins_search.ripgrep'
-__label__ = 'ripgrep'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/BurntSushi/ripgrep'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'ripgrep': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import RIPGREP_CONFIG
-
- return {
- 'ripgrep': RIPGREP_CONFIG
- }
-
-
-@abx.hookimpl
-def get_BINARIES():
- from .binaries import RIPGREP_BINARY
-
- return {
- 'ripgrep': RIPGREP_BINARY
- }
-
-
-@abx.hookimpl
-def get_SEARCHBACKENDS():
- from .searchbackend import RIPGREP_SEARCH_BACKEND
-
- return {
- 'ripgrep': RIPGREP_SEARCH_BACKEND,
- }
diff --git a/archivebox/plugins_search/sqlitefts/__init__.py b/archivebox/plugins_search/sqlitefts/__init__.py
deleted file mode 100644
index 63fb1b12..00000000
--- a/archivebox/plugins_search/sqlitefts/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-__package__ = 'plugins_search.sqlitefts'
-__label__ = 'sqlitefts'
-__version__ = '2024.10.14'
-__author__ = 'ArchiveBox'
-__homepage__ = 'https://github.com/ArchiveBox/archivebox'
-__dependencies__ = []
-
-import abx
-
-
-@abx.hookimpl
-def get_PLUGIN():
- return {
- 'sqlitefts': {
- 'PACKAGE': __package__,
- 'LABEL': __label__,
- 'VERSION': __version__,
- 'AUTHOR': __author__,
- 'HOMEPAGE': __homepage__,
- 'DEPENDENCIES': __dependencies__,
- }
- }
-
-@abx.hookimpl
-def get_CONFIG():
- from .config import SQLITEFTS_CONFIG
-
- return {
- 'sqlitefts': SQLITEFTS_CONFIG
- }
-
-
-@abx.hookimpl
-def get_SEARCHBACKENDS():
- from .searchbackend import SQLITEFTS_SEARCH_BACKEND
-
- return {
- 'sqlitefts': SQLITEFTS_SEARCH_BACKEND,
- }
diff --git a/archivebox/queues/supervisor_util.py b/archivebox/queues/supervisor_util.py
index f181da08..0a4285f8 100644
--- a/archivebox/queues/supervisor_util.py
+++ b/archivebox/queues/supervisor_util.py
@@ -26,6 +26,23 @@ CONFIG_FILE_NAME = "supervisord.conf"
PID_FILE_NAME = "supervisord.pid"
WORKERS_DIR_NAME = "workers"
+SCHEDULER_WORKER = {
+ "name": "worker_scheduler",
+ "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks",
+ "autostart": "true",
+ "autorestart": "true",
+ "stdout_logfile": "logs/worker_scheduler.log",
+ "redirect_stderr": "true",
+}
+COMMAND_WORKER = {
+ "name": "worker_commands",
+ "command": "archivebox manage djangohuey --queue commands -w 4 -k thread --no-periodic --disable-health-check",
+ "autostart": "true",
+ "autorestart": "true",
+ "stdout_logfile": "logs/worker_commands.log",
+ "redirect_stderr": "true",
+}
+
@cache
def get_sock_file():
"""Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits"""
@@ -84,33 +101,35 @@ files = {WORKERS_DIR}/*.conf
"""
CONFIG_FILE.write_text(config_content)
- Path.mkdir(WORKERS_DIR, exist_ok=True)
+ Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True)
+
(WORKERS_DIR / 'initial_startup.conf').write_text('') # hides error about "no files found to include" when supervisord starts
def create_worker_config(daemon):
+ """Create a supervisord worker config file for a given daemon"""
SOCK_FILE = get_sock_file()
WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME
- Path.mkdir(WORKERS_DIR, exist_ok=True)
+ Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True)
name = daemon['name']
- configfile = WORKERS_DIR / f"{name}.conf"
+ worker_conf = WORKERS_DIR / f"{name}.conf"
- config_content = f"[program:{name}]\n"
+ worker_str = f"[program:{name}]\n"
for key, value in daemon.items():
if key == 'name':
continue
- config_content += f"{key}={value}\n"
- config_content += "\n"
+ worker_str += f"{key}={value}\n"
+ worker_str += "\n"
- configfile.write_text(config_content)
+ worker_conf.write_text(worker_str)
def get_existing_supervisord_process():
SOCK_FILE = get_sock_file()
try:
transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}")
- server = ServerProxy("http://localhost", transport=transport)
+ server = ServerProxy("http://localhost", transport=transport) # user:pass@localhost doesn't work for some reason with unix://.sock, cant seem to silence CRIT no-auth warning
current_state = cast(Dict[str, int | str], server.supervisor.getState())
if current_state["statename"] == "RUNNING":
pid = server.supervisor.getPID()
@@ -127,6 +146,7 @@ def stop_existing_supervisord_process():
PID_FILE = SOCK_FILE.parent / PID_FILE_NAME
try:
+ # if pid file exists, load PID int
try:
pid = int(PID_FILE.read_text())
except (FileNotFoundError, ValueError):
@@ -136,15 +156,15 @@ def stop_existing_supervisord_process():
print(f"[🦸♂️] Stopping supervisord process (pid={pid})...")
proc = psutil.Process(pid)
proc.terminate()
- proc.wait()
- except (Exception, BrokenPipeError, IOError):
+ proc.wait(timeout=5)
+ except (BaseException, BrokenPipeError, IOError, KeyboardInterrupt):
pass
finally:
try:
# clear PID file and socket file
PID_FILE.unlink(missing_ok=True)
get_sock_file().unlink(missing_ok=True)
- except Exception:
+ except BaseException:
pass
def start_new_supervisord_process(daemonize=False):
@@ -278,47 +298,6 @@ def start_worker(supervisor, daemon, lazy=False):
raise Exception(f"Failed to start worker {daemon['name']}! Only found: {procs}")
-def watch_worker(supervisor, daemon_name, interval=5):
- """loop continuously and monitor worker's health"""
- while True:
- proc = get_worker(supervisor, daemon_name)
- if not proc:
- raise Exception("Worker dissapeared while running! " + daemon_name)
-
- if proc['statename'] == 'STOPPED':
- return proc
-
- if proc['statename'] == 'RUNNING':
- time.sleep(1)
- continue
-
- if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'):
- print(f'[🦸♂️] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}')
- time.sleep(interval)
- continue
-
-def tail_worker_logs(log_path: str):
- get_or_create_supervisord_process(daemonize=False)
-
- from rich.live import Live
- from rich.table import Table
-
- table = Table()
- table.add_column("TS")
- table.add_column("URL")
-
- try:
- with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid
- with open(log_path, 'r') as f:
- for line in follow(f):
- if '://' in line:
- live.console.print(f"Working on: {line.strip()}")
- # table.add_row("123124234", line.strip())
- except (KeyboardInterrupt, BrokenPipeError, IOError):
- STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
- except SystemExit:
- pass
-
def get_worker(supervisor, daemon_name):
try:
return supervisor.getProcessInfo(daemon_name)
@@ -350,28 +329,55 @@ def stop_worker(supervisor, daemon_name):
raise Exception(f"Failed to stop worker {daemon_name}!")
+def tail_worker_logs(log_path: str):
+ get_or_create_supervisord_process(daemonize=False)
+
+ from rich.live import Live
+ from rich.table import Table
+
+ table = Table()
+ table.add_column("TS")
+ table.add_column("URL")
+
+ try:
+ with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid
+ with open(log_path, 'r') as f:
+ for line in follow(f):
+ if '://' in line:
+ live.console.print(f"Working on: {line.strip()}")
+ # table.add_row("123124234", line.strip())
+ except (KeyboardInterrupt, BrokenPipeError, IOError):
+ STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...")
+ except SystemExit:
+ pass
+
+def watch_worker(supervisor, daemon_name, interval=5):
+ """loop continuously and monitor worker's health"""
+ while True:
+ proc = get_worker(supervisor, daemon_name)
+ if not proc:
+ raise Exception("Worker dissapeared while running! " + daemon_name)
+
+ if proc['statename'] == 'STOPPED':
+ return proc
+
+ if proc['statename'] == 'RUNNING':
+ time.sleep(1)
+ continue
+
+ if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'):
+ print(f'[🦸♂️] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}')
+ time.sleep(interval)
+ continue
+
def start_server_workers(host='0.0.0.0', port='8000', daemonize=False):
supervisor = get_or_create_supervisord_process(daemonize=daemonize)
bg_workers = [
- {
- "name": "worker_scheduler",
- "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks",
- "autostart": "true",
- "autorestart": "true",
- "stdout_logfile": "logs/worker_scheduler.log",
- "redirect_stderr": "true",
- },
- {
- "name": "worker_system_tasks",
- "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --no-periodic --disable-health-check",
- "autostart": "true",
- "autorestart": "true",
- "stdout_logfile": "logs/worker_system_tasks.log",
- "redirect_stderr": "true",
- },
+ SCHEDULER_WORKER,
+ COMMAND_WORKER,
]
fg_worker = {
"name": "worker_daphne",
diff --git a/archivebox/queues/tasks.py b/archivebox/queues/tasks.py
index acfeab0b..6f62a8c1 100644
--- a/archivebox/queues/tasks.py
+++ b/archivebox/queues/tasks.py
@@ -1,7 +1,7 @@
__package__ = 'archivebox.queues'
from functools import wraps
-from django.utils import timezone
+# from django.utils import timezone
from django_huey import db_task, task
@@ -10,7 +10,7 @@ from huey_monitor.tqdm import ProcessInfo
from .supervisor_util import get_or_create_supervisord_process
-# @db_task(queue="system_tasks", context=True, schedule=1)
+# @db_task(queue="commands", context=True, schedule=1)
# def scheduler_tick():
# print('SCHEDULER TICK', timezone.now().isoformat())
# # abx.archivebox.events.on_scheduler_runloop_start(timezone.now(), machine=Machine.objects.get_current_machine())
@@ -43,7 +43,7 @@ def db_task_with_parent(func):
return wrapper
-@db_task(queue="system_tasks", context=True)
+@db_task(queue="commands", context=True)
def bg_add(add_kwargs, task=None, parent_task_id=None):
get_or_create_supervisord_process(daemonize=False)
@@ -62,7 +62,7 @@ def bg_add(add_kwargs, task=None, parent_task_id=None):
return result
-@task(queue="system_tasks", context=True)
+@task(queue="commands", context=True)
def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None):
get_or_create_supervisord_process(daemonize=False)
@@ -83,7 +83,7 @@ def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None):
return result
-@task(queue="system_tasks", context=True)
+@task(queue="commands", context=True)
def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None):
get_or_create_supervisord_process(daemonize=False)
@@ -104,7 +104,7 @@ def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None):
return result
-@task(queue="system_tasks", context=True)
+@task(queue="commands", context=True)
def bg_archive_snapshot(snapshot, overwrite=False, methods=None, task=None, parent_task_id=None):
# get_or_create_supervisord_process(daemonize=False)
diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py
index 2e7d4f69..921c074f 100644
--- a/archivebox/search/__init__.py
+++ b/archivebox/search/__init__.py
@@ -6,8 +6,8 @@ from typing import List, Union
from django.db.models import QuerySet
from django.conf import settings
-import abx.archivebox.reads
-
+import abx
+import archivebox
from archivebox.index.schema import Link
from archivebox.misc.util import enforce_types
from archivebox.misc.logging import stderr
@@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet):
def import_backend():
- for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values():
+ for backend in abx.as_dict(archivebox.pm.hook.get_SEARCHBACKENDS()).values():
if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE:
return backend
raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')
diff --git a/archivebox/seeds/models.py b/archivebox/seeds/models.py
index b0d83b2e..7fe49c83 100644
--- a/archivebox/seeds/models.py
+++ b/archivebox/seeds/models.py
@@ -1,19 +1,8 @@
__package__ = 'archivebox.seeds'
-from datetime import datetime
-
-from django_stubs_ext.db.models import TypedModelMeta
-
from django.db import models
-from django.db.models import Q
-from django.core.validators import MaxValueValidator, MinValueValidator
from django.conf import settings
-from django.utils import timezone
-from django.utils.functional import cached_property
-from django.urls import reverse_lazy
-
-from pathlib import Path
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
@@ -47,7 +36,10 @@ class Seed(ABIDModel, ModelWithHealthStats):
abid_rand_src = 'self.id'
abid_drift_allowed = True
- uri = models.URLField(max_length=255, blank=False, null=False, unique=True) # unique source location where URLs will be loaded from
+ id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID')
+ abid = ABIDField(prefix=abid_prefix)
+
+ uri = models.URLField(max_length=2000, blank=False, null=False) # unique source location where URLs will be loaded from
extractor = models.CharField(default='auto', max_length=32) # suggested extractor to use to load this URL source
tags_str = models.CharField(max_length=255, null=False, blank=True, default='') # tags to attach to any URLs that come from this source
@@ -64,4 +56,10 @@ class Seed(ABIDModel, ModelWithHealthStats):
# pocketapi://
# s3://
# etc..
- return self.uri.split('://')[0].lower()
+ return self.uri.split('://', 1)[0].lower()
+
+ class Meta:
+ verbose_name = 'Seed'
+ verbose_name_plural = 'Seeds'
+
+ unique_together = (('created_by', 'uri', 'extractor'),)
diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py
deleted file mode 100644
index a997acbb..00000000
--- a/archivebox/vendor/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import sys
-import importlib
-from pathlib import Path
-
-VENDOR_DIR = Path(__file__).parent
-
-VENDORED_LIBS = {
- # sys.path dir: library name
- #'python-atomicwrites': 'atomicwrites',
- #'django-taggit': 'taggit',
- 'pydantic-pkgr': 'pydantic_pkgr',
- 'pocket': 'pocket',
- #'base32-crockford': 'base32_crockford',
-}
-
-def load_vendored_libs():
- for lib_subdir, lib_name in VENDORED_LIBS.items():
- lib_dir = VENDOR_DIR / lib_subdir
- assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}'
-
- try:
- lib = importlib.import_module(lib_name)
- # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}")
- except ImportError:
- sys.path.append(str(lib_dir))
- try:
- lib = importlib.import_module(lib_name)
- # print(f"Successfully imported lib from vendored fallback {lib_name}: {inspect.getfile(lib)}")
- except ImportError as e:
- print(f"Failed to import lib from environment or vendored fallback {lib_name}: {e}", file=sys.stderr)
- sys.exit(1)
-
-
diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket
deleted file mode 160000
index e7970b63..00000000
--- a/archivebox/vendor/pocket
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5
diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr
deleted file mode 160000
index a116eaef..00000000
--- a/archivebox/vendor/pydantic-pkgr
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit a116eaef7f090dc872b18e82b5a538313075ded6
diff --git a/archivebox/vendor/requirements.txt b/archivebox/vendor/requirements.txt
deleted file mode 100644
index 43be87c2..00000000
--- a/archivebox/vendor/requirements.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# this folder contains vendored versions of these packages
-
-#atomicwrites==1.4.0
-#pocket==0.3.7
-pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7
-#django-taggit==1.3.0
-#base32-crockford==0.3.0
-pydantic-pkgr>=0.4.7
diff --git a/click_test.py b/click_test.py
new file mode 100644
index 00000000..4e4a0e40
--- /dev/null
+++ b/click_test.py
@@ -0,0 +1,30 @@
+import sys
+import click
+from rich import print
+from archivebox.config.django import setup_django
+
+setup_django()
+
+
+def parse_stdin_to_args(io=sys.stdin):
+ for line in io.read().split('\n'):
+ for url_or_id in line.split(' '):
+ if url_or_id.strip():
+ yield url_or_id.strip()
+
+
+# Gather data from stdin in case using a pipe
+if not sys.stdin.isatty():
+ sys.argv += parse_stdin_to_args(sys.stdin)
+
+
+@click.command()
+@click.argument("snapshot_ids_or_urls", type=str, nargs=-1)
+def extract(snapshot_ids_or_urls):
+ for url_or_snapshot_id in snapshot_ids_or_urls:
+ print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr)
+ for result in archivebox.pm.hook.extract(url_or_snapshot_id):
+ print(result)
+
+if __name__ == "__main__":
+ extract()
diff --git a/pyproject.toml b/pyproject.toml
index f692da81..aceae950 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "archivebox"
-version = "0.8.5rc53"
+version = "0.8.6rc0"
requires-python = ">=3.10"
description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@@ -39,13 +39,14 @@ classifiers = [
dependencies = [
- # ... archivebox/vendor/* # see vendored libs here
+ # ... archivebox/pkgs/* # see vendored libs here
############# Django / Core Libraries #############
"setuptools>=74.1.0",
"django>=5.1.1,<6.0",
"django-ninja>=1.3.0",
"django-extensions>=3.2.3",
"mypy-extensions>=1.0.0",
+ "typing_extensions>=4.12.2",
"channels[daphne]>=4.1.0",
"django-signal-webhooks>=0.3.0",
"django-admin-data-views>=0.4.1",
@@ -60,7 +61,7 @@ dependencies = [
"pluggy>=1.5.0",
"requests>=2.32.3",
"dateparser>=1.2.0",
- "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553
+ "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553
"feedparser>=6.0.11",
"w3lib>=2.2.1",
"rich>=13.8.0",
@@ -69,20 +70,51 @@ dependencies = [
"typeid-python>=0.3.1",
"psutil>=6.0.0",
"supervisor>=4.2.5",
- "python-crontab>=3.2.0", # for: archivebox schedule
- "croniter>=3.0.3", # for: archivebox schedule
- "ipython>=8.27.0", # for: archivebox shell
- "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid
+ "python-crontab>=3.2.0", # for: archivebox schedule
+ "croniter>=3.0.3", # for: archivebox schedule
+ "ipython>=8.27.0", # for: archivebox shell
+ "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid
"python-benedict[io,parse]>=0.33.2",
"pydantic-settings>=2.5.2",
"atomicwrites==1.4.1",
"django-taggit==6.1.0",
"base32-crockford==0.3.0",
- # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7",
+ "platformdirs>=4.3.6",
"pydantic-pkgr>=0.5.4",
- ############# Plugin Dependencies ################
+ "pocket>=0.3.6",
"sonic-client>=1.0.0",
- "yt-dlp>=2024.8.6", # for: media"
+ "yt-dlp>=2024.8.6", # for: media"
+ ############# Plugin Dependencies ################
+ "abx>=0.1.0",
+ "abx-spec-pydantic-pkgr>=0.1.0",
+ "abx-spec-config>=0.1.0",
+ "abx-spec-archivebox>=0.1.0",
+ "abx-spec-django>=0.1.0",
+ "abx-spec-extractor>=0.1.0",
+ "abx-spec-searchbackend>=0.1.0",
+ "abx-plugin-default-binproviders>=2024.10.24",
+ "abx-plugin-pip>=2024.10.24",
+ "abx-plugin-npm>=2024.10.24",
+ "abx-plugin-playwright>=2024.10.24",
+ "abx-plugin-puppeteer>=2024.10.28",
+ "abx-plugin-ripgrep-search>=2024.10.28",
+ "abx-plugin-sqlitefts-search>=2024.10.28",
+ "abx-plugin-sonic-search>=2024.10.28",
+ "abx-plugin-ldap-auth>=2024.10.28",
+ "abx-plugin-curl>=2024.10.27",
+ "abx-plugin-wget>=2024.10.28",
+ "abx-plugin-git>=2024.10.28",
+ "abx-plugin-chrome>=2024.10.28",
+ "abx-plugin-ytdlp>=2024.10.28",
+ "abx-plugin-title>=2024.10.27",
+ "abx-plugin-favicon>=2024.10.27",
+ # "abx-plugin-headers>=2024.10.27",
+ "abx-plugin-archivedotorg>=2024.10.28",
+ "abx-plugin-singlefile>=2024.10.28",
+ "abx-plugin-readability>=2024.10.28",
+ "abx-plugin-mercury>=2024.10.28",
+ "abx-plugin-htmltotext>=2024.10.28",
+ "python-statemachine>=2.3.6",
]
[project.optional-dependencies]
@@ -113,7 +145,7 @@ all = [
[tool.uv]
dev-dependencies = [
### BUILD
- "uv",
+ "uv>=0.4.26",
"pip>=24.2",
"setuptools>=75.1.0",
"wheel>=0.44.0",
@@ -121,14 +153,17 @@ dev-dependencies = [
#"homebrew-pypi-poet>=0.10.0", # for: generating archivebox.rb brewfile list of python packages
### DOCS
"recommonmark>=0.7.1",
- "sphinx",
+ "sphinx>=8.1.3",
"sphinx-rtd-theme>=2.0.0",
### DEBUGGING
- "archivebox[debug]",
+ "django-debug-toolbar>=4.4.6",
+ "requests-tracker>=0.3.3",
+ "djdt_flamegraph>=0.2.13",
+ "ipdb>=0.13.13",
"logfire[django]>=0.51.0",
"opentelemetry-instrumentation-django>=0.47b0",
"opentelemetry-instrumentation-sqlite3>=0.47b0",
- "viztracer", # usage: viztracer ../.venv/bin/archivebox manage check
+ "viztracer>=0.17.0", # usage: viztracer ../.venv/bin/archivebox manage check
# "snakeviz", # usage: python -m cProfile -o flamegraph.prof ../.venv/bin/archivebox manage check
### TESTING
"pytest>=8.3.3",
@@ -139,8 +174,47 @@ dev-dependencies = [
"mypy>=1.11.2",
]
+[tool.uv.sources]
+# pydantic-pkgr = { workspace = true }
+
+abx = { workspace = true }
+abx-spec-pydantic-pkgr = { workspace = true }
+abx-spec-config = { workspace = true }
+abx-spec-archivebox = { workspace = true }
+abx-spec-django = { workspace = true }
+abx-spec-extractor = { workspace = true }
+abx-spec-searchbackend = { workspace = true }
+
+abx-plugin-default-binproviders = { workspace = true }
+abx-plugin-pip = { workspace = true }
+abx-plugin-npm = { workspace = true }
+abx-plugin-playwright = { workspace = true }
+abx-plugin-puppeteer = { workspace = true }
+abx-plugin-ripgrep-search = { workspace = true }
+abx-plugin-sqlitefts-search = { workspace = true }
+abx-plugin-sonic-search = { workspace = true }
+abx-plugin-ldap-auth = { workspace = true }
+
+abx-plugin-curl = { workspace = true }
+abx-plugin-wget = { workspace = true }
+abx-plugin-git = { workspace = true }
+abx-plugin-chrome = { workspace = true }
+abx-plugin-ytdlp = { workspace = true }
+
+abx-plugin-title = { workspace = true }
+abx-plugin-favicon = { workspace = true }
+# abx-plugin-headers = { workspace = true }
+abx-plugin-archivedotorg = { workspace = true }
+
+abx-plugin-singlefile = { workspace = true }
+abx-plugin-readability = { workspace = true }
+abx-plugin-mercury = { workspace = true }
+abx-plugin-htmltotext = { workspace = true }
+
+
[tool.uv.workspace]
-members = ["packages/*"]
+members = ["archivebox/pkgs/*"]
+exclude = ["archivebox/pkgs/__pycache__"]
[build-system]
requires = ["pdm-backend"]
@@ -155,7 +229,7 @@ package-dir = {"archivebox" = "archivebox"}
line-length = 140
target-version = "py310"
src = ["archivebox"]
-exclude = ["*.pyi", "typings/", "migrations/", "vendor/"]
+exclude = ["*.pyi", "typings/", "migrations/"]
# https://docs.astral.sh/ruff/rules/
[tool.ruff.lint]
@@ -190,7 +264,6 @@ exclude = [
"**/node_modules",
"**/__pycache__",
"**/migrations",
- "archivebox/vendor",
]
stubPath = "./archivebox/typings"
venvPath = "."
diff --git a/requirements.txt b/requirements.txt
index f9a37b4b..cf5cbb48 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,137 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml --all-extras -o requirements.txt
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-archivedotorg
+ # abx-plugin-chrome
+ # abx-plugin-curl
+ # abx-plugin-default-binproviders
+ # abx-plugin-favicon
+ # abx-plugin-git
+ # abx-plugin-htmltotext
+ # abx-plugin-ldap-auth
+ # abx-plugin-mercury
+ # abx-plugin-npm
+ # abx-plugin-pip
+ # abx-plugin-playwright
+ # abx-plugin-puppeteer
+ # abx-plugin-readability
+ # abx-plugin-ripgrep-search
+ # abx-plugin-singlefile
+ # abx-plugin-sonic-search
+ # abx-plugin-sqlitefts-search
+ # abx-plugin-title
+ # abx-plugin-wget
+ # abx-plugin-ytdlp
+ # abx-spec-archivebox
+ # abx-spec-config
+ # abx-spec-django
+ # abx-spec-extractor
+ # abx-spec-pydantic-pkgr
+ # abx-spec-searchbackend
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-archivedotorg
+ # abx-plugin-favicon
+ # abx-plugin-title
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-git
+ # abx-plugin-npm
+ # abx-plugin-pip
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-archivedotorg
+ # abx-plugin-chrome
+ # abx-plugin-curl
+ # abx-plugin-favicon
+ # abx-plugin-git
+ # abx-plugin-htmltotext
+ # abx-plugin-ldap-auth
+ # abx-plugin-mercury
+ # abx-plugin-npm
+ # abx-plugin-pip
+ # abx-plugin-playwright
+ # abx-plugin-puppeteer
+ # abx-plugin-readability
+ # abx-plugin-ripgrep-search
+ # abx-plugin-singlefile
+ # abx-plugin-sonic-search
+ # abx-plugin-sqlitefts-search
+ # abx-plugin-title
+ # abx-plugin-wget
+ # abx-plugin-ytdlp
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-ldap-auth
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor
+ # via archivebox (pyproject.toml)
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-chrome
+ # abx-plugin-curl
+ # abx-plugin-default-binproviders
+ # abx-plugin-git
+ # abx-plugin-npm
+ # abx-plugin-pip
+ # abx-plugin-playwright
+ # abx-plugin-puppeteer
+ # abx-plugin-singlefile
+ # abx-plugin-sonic-search
+ # abx-plugin-wget
+ # abx-plugin-ytdlp
+-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-ripgrep-search
+ # abx-plugin-sonic-search
+ # abx-plugin-sqlitefts-search
annotated-types==0.7.0
# via pydantic
anyio==4.6.2.post1
@@ -29,9 +161,9 @@ beautifulsoup4==4.12.3
# via python-benedict
brotli==1.1.0
# via yt-dlp
-bx-django-utils==79
+bx-django-utils==81
# via django-huey-monitor
-bx-py-utils==104
+bx-py-utils==105
# via
# bx-django-utils
# django-huey-monitor
@@ -49,7 +181,7 @@ charset-normalizer==3.4.0
# via requests
constantly==23.10.4
# via twisted
-croniter==3.0.3
+croniter==5.0.1
# via archivebox (pyproject.toml)
cryptography==43.0.3
# via
@@ -62,15 +194,23 @@ daphne==4.1.2
dateparser==1.2.0
# via archivebox (pyproject.toml)
decorator==5.1.1
- # via ipython
+ # via
+ # ipdb
+ # ipython
django==5.1.2
# via
# archivebox (pyproject.toml)
+ # abx
+ # abx-plugin-pip
+ # abx-spec-archivebox
+ # abx-spec-django
# bx-django-utils
# channels
# django-admin-data-views
# django-auth-ldap
+ # django-autotyping
# django-charid-field
+ # django-debug-toolbar
# django-extensions
# django-huey
# django-huey-monitor
@@ -81,12 +221,17 @@ django==5.1.2
# django-stubs
# django-stubs-ext
# django-taggit
-django-admin-data-views==0.4.1
+ # requests-tracker
+django-admin-data-views==0.4.2
# via archivebox (pyproject.toml)
django-auth-ldap==5.1.0
# via archivebox (pyproject.toml)
+django-autotyping==0.5.1
+ # via archivebox (pyproject.toml)
django-charid-field==0.4
# via archivebox (pyproject.toml)
+django-debug-toolbar==4.4.6
+ # via archivebox (pyproject.toml)
django-extensions==3.2.3
# via archivebox (pyproject.toml)
django-huey==1.2.1
@@ -101,25 +246,27 @@ django-object-actions==4.3.0
# via archivebox (pyproject.toml)
django-pydantic-field==0.3.10
# via archivebox (pyproject.toml)
-django-settings-holder==0.1.2
+django-settings-holder==0.2.2
# via
# django-admin-data-views
# django-signal-webhooks
-django-signal-webhooks==0.3.0
+django-signal-webhooks==0.3.1
# via archivebox (pyproject.toml)
-django-stubs==5.1.0
+django-stubs==5.1.1
# via archivebox (pyproject.toml)
-django-stubs-ext==5.1.0
+django-stubs-ext==5.1.1
# via django-stubs
django-taggit==6.1.0
# via archivebox (pyproject.toml)
-et-xmlfile==1.1.0
+djdt-flamegraph==0.2.13
+ # via archivebox (pyproject.toml)
+et-xmlfile==2.0.0
# via openpyxl
executing==2.1.0
# via stack-data
feedparser==6.0.11
# via archivebox (pyproject.toml)
-ftfy==6.3.0
+ftfy==6.3.1
# via python-benedict
h11==0.14.0
# via httpcore
@@ -144,10 +291,16 @@ idna==3.10
# twisted
incremental==24.7.2
# via twisted
-ipython==8.28.0
+ipdb==0.13.13
# via archivebox (pyproject.toml)
+ipython==8.29.0
+ # via
+ # archivebox (pyproject.toml)
+ # ipdb
jedi==0.19.1
# via ipython
+libcst==1.5.0
+ # via django-autotyping
mailchecker==6.0.11
# via python-benedict
markdown-it-py==3.0.0
@@ -166,11 +319,17 @@ parso==0.8.4
# via jedi
pexpect==4.9.0
# via ipython
-phonenumbers==8.13.47
+phonenumbers==8.13.48
# via python-benedict
platformdirs==4.3.6
- # via pydantic-pkgr
+ # via
+ # archivebox (pyproject.toml)
+ # pydantic-pkgr
pluggy==1.5.0
+ # via
+ # archivebox (pyproject.toml)
+ # abx
+pocket==0.3.6
# via archivebox (pyproject.toml)
prompt-toolkit==3.0.48
# via ipython
@@ -197,6 +356,10 @@ pycryptodomex==3.21.0
# via yt-dlp
pydantic==2.9.2
# via
+ # abx-plugin-playwright
+ # abx-spec-config
+ # abx-spec-extractor
+ # abx-spec-searchbackend
# django-ninja
# django-pydantic-field
# pydantic-pkgr
@@ -206,9 +369,21 @@ pydantic-core==2.23.4
# pydantic
# pydantic-pkgr
pydantic-pkgr==0.5.4
- # via archivebox (pyproject.toml)
-pydantic-settings==2.6.0
- # via archivebox (pyproject.toml)
+ # via
+ # archivebox (pyproject.toml)
+ # abx-plugin-default-binproviders
+ # abx-plugin-npm
+ # abx-plugin-pip
+ # abx-plugin-playwright
+ # abx-plugin-puppeteer
+ # abx-plugin-singlefile
+ # abx-plugin-sonic-search
+ # abx-plugin-ytdlp
+ # abx-spec-pydantic-pkgr
+pydantic-settings==2.6.1
+ # via
+ # archivebox (pyproject.toml)
+ # abx-spec-config
pygments==2.18.0
# via
# ipython
@@ -216,7 +391,11 @@ pygments==2.18.0
pyopenssl==24.2.1
# via twisted
python-benedict==0.34.0
- # via archivebox (pyproject.toml)
+ # via
+ # archivebox (pyproject.toml)
+ # abx-spec-config
+ # abx-spec-extractor
+ # abx-spec-searchbackend
python-crontab==3.2.0
# via archivebox (pyproject.toml)
python-dateutil==2.9.0.post0
@@ -242,23 +421,29 @@ pytz==2024.2
# croniter
# dateparser
pyyaml==6.0.2
- # via python-benedict
+ # via
+ # libcst
+ # python-benedict
regex==2024.9.11
# via dateparser
requests==2.32.3
# via
# archivebox (pyproject.toml)
+ # pocket
# python-benedict
# yt-dlp
-rich==13.9.2
+requests-tracker==0.3.3
+ # via archivebox (pyproject.toml)
+rich==13.9.4
# via
# archivebox (pyproject.toml)
+ # abx-spec-config
# rich-argparse
-rich-argparse==1.5.2
+rich-argparse==1.6.0
# via archivebox (pyproject.toml)
-service-identity==24.1.0
+service-identity==24.2.0
# via twisted
-setuptools==75.2.0
+setuptools==75.3.0
# via
# archivebox (pyproject.toml)
# autobahn
@@ -280,7 +465,10 @@ sonic-client==1.0.0
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.1
- # via django
+ # via
+ # django
+ # django-debug-toolbar
+ # requests-tracker
stack-data==0.6.3
# via ipython
supervisor==4.2.5
@@ -293,7 +481,7 @@ traitlets==5.14.3
# via
# ipython
# matplotlib-inline
-twisted==24.7.0
+twisted==24.10.0
# via daphne
txaio==23.1.1
# via autobahn
@@ -303,6 +491,7 @@ types-pyyaml==6.0.12.20240917
# via django-stubs
typing-extensions==4.12.2
# via
+ # archivebox (pyproject.toml)
# django-pydantic-field
# django-stubs
# django-stubs-ext
@@ -310,6 +499,8 @@ typing-extensions==4.12.2
# pydantic-core
# pydantic-pkgr
# twisted
+tzdata==2024.2
+ # via archivebox (pyproject.toml)
tzlocal==5.2
# via dateparser
ulid-py==1.1.0
@@ -332,7 +523,7 @@ xlrd==2.0.1
# via python-benedict
xmltodict==0.14.2
# via python-benedict
-yt-dlp==2024.10.7
+yt-dlp==2024.10.22
# via archivebox (pyproject.toml)
-zope-interface==7.1.0
+zope-interface==7.1.1
# via twisted
diff --git a/uv.lock b/uv.lock
index 1436d2f1..761668b7 100644
--- a/uv.lock
+++ b/uv.lock
@@ -6,6 +6,565 @@ resolution-markers = [
"python_full_version >= '3.13'",
]
+[manifest]
+members = [
+ "abx",
+ "abx-plugin-archivedotorg",
+ "abx-plugin-chrome",
+ "abx-plugin-curl",
+ "abx-plugin-default-binproviders",
+ "abx-plugin-favicon",
+ "abx-plugin-git",
+ "abx-plugin-htmltotext",
+ "abx-plugin-ldap-auth",
+ "abx-plugin-mercury",
+ "abx-plugin-npm",
+ "abx-plugin-pip",
+ "abx-plugin-playwright",
+ "abx-plugin-pocket",
+ "abx-plugin-puppeteer",
+ "abx-plugin-readability",
+ "abx-plugin-readwise",
+ "abx-plugin-ripgrep-search",
+ "abx-plugin-singlefile",
+ "abx-plugin-sonic-search",
+ "abx-plugin-sqlitefts-search",
+ "abx-plugin-title",
+ "abx-plugin-wget",
+ "abx-plugin-ytdlp",
+ "abx-spec-archivebox",
+ "abx-spec-config",
+ "abx-spec-django",
+ "abx-spec-extractor",
+ "abx-spec-pydantic-pkgr",
+ "abx-spec-searchbackend",
+ "archivebox",
+]
+
+[[package]]
+name = "abx"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx" }
+dependencies = [
+ { name = "django" },
+ { name = "pluggy" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "django", specifier = ">=5.1.1,<6.0" },
+ { name = "pluggy", specifier = ">=1.5.0" },
+]
+
+[[package]]
+name = "abx-plugin-archivedotorg"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-archivedotorg" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-curl" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-chrome"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-chrome" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+]
+
+[[package]]
+name = "abx-plugin-curl"
+version = "2024.10.24"
+source = { editable = "archivebox/pkgs/abx-plugin-curl" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+]
+
+[[package]]
+name = "abx-plugin-default-binproviders"
+version = "2024.10.24"
+source = { editable = "archivebox/pkgs/abx-plugin-default-binproviders" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-favicon"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-favicon" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-curl" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-git"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-git" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-default-binproviders" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+]
+
+[[package]]
+name = "abx-plugin-htmltotext"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-htmltotext" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-ldap-auth"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-ldap-auth" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-django" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" },
+]
+
+[[package]]
+name = "abx-plugin-mercury"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-mercury" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-npm"
+version = "2024.10.24"
+source = { editable = "archivebox/pkgs/abx-plugin-npm" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-default-binproviders" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-pip"
+version = "2024.10.24"
+source = { editable = "archivebox/pkgs/abx-plugin-pip" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-default-binproviders" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "django" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "django", specifier = ">=5.0.0" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-playwright"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-playwright" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic", specifier = ">=2.4.2" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-pocket"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-pocket" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "pocket" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "pocket", specifier = ">=0.3.6" },
+]
+
+[[package]]
+name = "abx-plugin-puppeteer"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-puppeteer" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-readability"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-readability" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-readwise"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-readwise" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-ripgrep-search"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-ripgrep-search" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-searchbackend" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" },
+]
+
+[[package]]
+name = "abx-plugin-singlefile"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-singlefile" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-sonic-search"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-sonic-search" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "abx-spec-searchbackend" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-plugin-sqlitefts-search"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-searchbackend" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" },
+]
+
+[[package]]
+name = "abx-plugin-title"
+version = "2024.10.27"
+source = { editable = "archivebox/pkgs/abx-plugin-title" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-curl" },
+ { name = "abx-spec-config" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+]
+
+[[package]]
+name = "abx-plugin-wget"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-wget" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+]
+
+[[package]]
+name = "abx-plugin-ytdlp"
+version = "2024.10.28"
+source = { editable = "archivebox/pkgs/abx-plugin-ytdlp" }
+dependencies = [
+ { name = "abx" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-spec-archivebox"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-archivebox" }
+dependencies = [
+ { name = "abx" },
+ { name = "django" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "django", specifier = ">=5.1.1,<6.0" },
+]
+
+[[package]]
+name = "abx-spec-config"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-config" }
+dependencies = [
+ { name = "abx" },
+ { name = "pydantic" },
+ { name = "pydantic-settings" },
+ { name = "python-benedict" },
+ { name = "rich" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "pydantic", specifier = ">=2.9.2" },
+ { name = "pydantic-settings", specifier = ">=2.6.0" },
+ { name = "python-benedict", specifier = ">=0.34.0" },
+ { name = "rich", specifier = ">=13.9.3" },
+]
+
+[[package]]
+name = "abx-spec-django"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-django" }
+dependencies = [
+ { name = "abx" },
+ { name = "django" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "django", specifier = ">=5.1.1,<6.0" },
+]
+
+[[package]]
+name = "abx-spec-extractor"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-extractor" }
+dependencies = [
+ { name = "abx" },
+ { name = "pydantic" },
+ { name = "python-benedict" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "pydantic", specifier = ">=2.5.0" },
+ { name = "python-benedict", specifier = ">=0.26.0" },
+]
+
+[[package]]
+name = "abx-spec-pydantic-pkgr"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }
+dependencies = [
+ { name = "abx" },
+ { name = "pydantic-pkgr" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "pydantic-pkgr", specifier = ">=0.5.4" },
+]
+
+[[package]]
+name = "abx-spec-searchbackend"
+version = "0.1.0"
+source = { editable = "archivebox/pkgs/abx-spec-searchbackend" }
+dependencies = [
+ { name = "abx" },
+ { name = "pydantic" },
+ { name = "python-benedict" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "pydantic", specifier = ">=2.5.0" },
+ { name = "python-benedict", specifier = ">=0.26.0" },
+]
+
[[package]]
name = "alabaster"
version = "1.0.0"
@@ -41,9 +600,37 @@ wheels = [
[[package]]
name = "archivebox"
-version = "0.8.5rc53"
+version = "0.8.6rc0"
source = { editable = "." }
dependencies = [
+ { name = "abx" },
+ { name = "abx-plugin-archivedotorg" },
+ { name = "abx-plugin-chrome" },
+ { name = "abx-plugin-curl" },
+ { name = "abx-plugin-default-binproviders" },
+ { name = "abx-plugin-favicon" },
+ { name = "abx-plugin-git" },
+ { name = "abx-plugin-htmltotext" },
+ { name = "abx-plugin-ldap-auth" },
+ { name = "abx-plugin-mercury" },
+ { name = "abx-plugin-npm" },
+ { name = "abx-plugin-pip" },
+ { name = "abx-plugin-playwright" },
+ { name = "abx-plugin-puppeteer" },
+ { name = "abx-plugin-readability" },
+ { name = "abx-plugin-ripgrep-search" },
+ { name = "abx-plugin-singlefile" },
+ { name = "abx-plugin-sonic-search" },
+ { name = "abx-plugin-sqlitefts-search" },
+ { name = "abx-plugin-title" },
+ { name = "abx-plugin-wget" },
+ { name = "abx-plugin-ytdlp" },
+ { name = "abx-spec-archivebox" },
+ { name = "abx-spec-config" },
+ { name = "abx-spec-django" },
+ { name = "abx-spec-extractor" },
+ { name = "abx-spec-pydantic-pkgr" },
+ { name = "abx-spec-searchbackend" },
{ name = "atomicwrites" },
{ name = "base32-crockford" },
{ name = "channels", extra = ["daphne"] },
@@ -65,13 +652,16 @@ dependencies = [
{ name = "feedparser" },
{ name = "ipython" },
{ name = "mypy-extensions" },
+ { name = "platformdirs" },
{ name = "pluggy" },
+ { name = "pocket" },
{ name = "psutil" },
{ name = "py-machineid" },
{ name = "pydantic-pkgr" },
{ name = "pydantic-settings" },
{ name = "python-benedict", extra = ["io", "parse"] },
{ name = "python-crontab" },
+ { name = "python-statemachine" },
{ name = "requests" },
{ name = "rich" },
{ name = "rich-argparse" },
@@ -79,6 +669,7 @@ dependencies = [
{ name = "sonic-client" },
{ name = "supervisor" },
{ name = "typeid-python" },
+ { name = "typing-extensions" },
{ name = "tzdata" },
{ name = "ulid-py" },
{ name = "w3lib" },
@@ -88,7 +679,19 @@ dependencies = [
[package.optional-dependencies]
all = [
{ name = "django-auth-ldap" },
+ { name = "django-autotyping" },
+ { name = "django-debug-toolbar" },
+ { name = "djdt-flamegraph" },
+ { name = "ipdb" },
{ name = "python-ldap" },
+ { name = "requests-tracker" },
+]
+debug = [
+ { name = "django-autotyping" },
+ { name = "django-debug-toolbar" },
+ { name = "djdt-flamegraph" },
+ { name = "ipdb" },
+ { name = "requests-tracker" },
]
ldap = [
{ name = "django-auth-ldap" },
@@ -99,11 +702,9 @@ ldap = [
dev = [
{ name = "bottle" },
{ name = "bumpver" },
- { name = "django-autotyping" },
{ name = "django-debug-toolbar" },
{ name = "djdt-flamegraph" },
{ name = "flake8" },
- { name = "homebrew-pypi-poet" },
{ name = "ipdb" },
{ name = "logfire", extra = ["django"] },
{ name = "mypy" },
@@ -124,7 +725,35 @@ dev = [
[package.metadata]
requires-dist = [
- { name = "archivebox", extras = ["sonic", "ldap"], marker = "extra == 'all'" },
+ { name = "abx", editable = "archivebox/pkgs/abx" },
+ { name = "abx-plugin-archivedotorg", editable = "archivebox/pkgs/abx-plugin-archivedotorg" },
+ { name = "abx-plugin-chrome", editable = "archivebox/pkgs/abx-plugin-chrome" },
+ { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" },
+ { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" },
+ { name = "abx-plugin-favicon", editable = "archivebox/pkgs/abx-plugin-favicon" },
+ { name = "abx-plugin-git", editable = "archivebox/pkgs/abx-plugin-git" },
+ { name = "abx-plugin-htmltotext", editable = "archivebox/pkgs/abx-plugin-htmltotext" },
+ { name = "abx-plugin-ldap-auth", editable = "archivebox/pkgs/abx-plugin-ldap-auth" },
+ { name = "abx-plugin-mercury", editable = "archivebox/pkgs/abx-plugin-mercury" },
+ { name = "abx-plugin-npm", editable = "archivebox/pkgs/abx-plugin-npm" },
+ { name = "abx-plugin-pip", editable = "archivebox/pkgs/abx-plugin-pip" },
+ { name = "abx-plugin-playwright", editable = "archivebox/pkgs/abx-plugin-playwright" },
+ { name = "abx-plugin-puppeteer", editable = "archivebox/pkgs/abx-plugin-puppeteer" },
+ { name = "abx-plugin-readability", editable = "archivebox/pkgs/abx-plugin-readability" },
+ { name = "abx-plugin-ripgrep-search", editable = "archivebox/pkgs/abx-plugin-ripgrep-search" },
+ { name = "abx-plugin-singlefile", editable = "archivebox/pkgs/abx-plugin-singlefile" },
+ { name = "abx-plugin-sonic-search", editable = "archivebox/pkgs/abx-plugin-sonic-search" },
+ { name = "abx-plugin-sqlitefts-search", editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" },
+ { name = "abx-plugin-title", editable = "archivebox/pkgs/abx-plugin-title" },
+ { name = "abx-plugin-wget", editable = "archivebox/pkgs/abx-plugin-wget" },
+ { name = "abx-plugin-ytdlp", editable = "archivebox/pkgs/abx-plugin-ytdlp" },
+ { name = "abx-spec-archivebox", editable = "archivebox/pkgs/abx-spec-archivebox" },
+ { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" },
+ { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" },
+ { name = "abx-spec-extractor", editable = "archivebox/pkgs/abx-spec-extractor" },
+ { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" },
+ { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" },
+ { name = "archivebox", extras = ["sonic", "ldap", "debug"], marker = "extra == 'all'" },
{ name = "atomicwrites", specifier = "==1.4.1" },
{ name = "base32-crockford", specifier = "==0.3.0" },
{ name = "channels", extras = ["daphne"], specifier = ">=4.1.0" },
@@ -133,7 +762,9 @@ requires-dist = [
{ name = "django", specifier = ">=5.1.1,<6.0" },
{ name = "django-admin-data-views", specifier = ">=0.4.1" },
{ name = "django-auth-ldap", marker = "extra == 'ldap'", specifier = ">=4.1.0" },
+ { name = "django-autotyping", marker = "extra == 'debug'", specifier = ">=0.5.1" },
{ name = "django-charid-field", specifier = ">=0.4" },
+ { name = "django-debug-toolbar", marker = "extra == 'debug'", specifier = ">=4.4.6" },
{ name = "django-extensions", specifier = ">=3.2.3" },
{ name = "django-huey", specifier = ">=1.2.1" },
{ name = "django-huey-monitor", specifier = ">=0.9.0" },
@@ -144,10 +775,14 @@ requires-dist = [
{ name = "django-signal-webhooks", specifier = ">=0.3.0" },
{ name = "django-stubs", specifier = ">=5.0.4" },
{ name = "django-taggit", specifier = "==6.1.0" },
+ { name = "djdt-flamegraph", marker = "extra == 'debug'", specifier = ">=0.2.13" },
{ name = "feedparser", specifier = ">=6.0.11" },
+ { name = "ipdb", marker = "extra == 'debug'", specifier = ">=0.13.13" },
{ name = "ipython", specifier = ">=8.27.0" },
{ name = "mypy-extensions", specifier = ">=1.0.0" },
+ { name = "platformdirs", specifier = ">=4.3.6" },
{ name = "pluggy", specifier = ">=1.5.0" },
+ { name = "pocket", specifier = ">=0.3.6" },
{ name = "psutil", specifier = ">=6.0.0" },
{ name = "py-machineid", specifier = ">=0.6.0" },
{ name = "pydantic-pkgr", specifier = ">=0.5.4" },
@@ -155,13 +790,16 @@ requires-dist = [
{ name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" },
{ name = "python-crontab", specifier = ">=3.2.0" },
{ name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" },
+ { name = "python-statemachine", specifier = ">=2.3.6" },
{ name = "requests", specifier = ">=2.32.3" },
+ { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" },
{ name = "rich", specifier = ">=13.8.0" },
{ name = "rich-argparse", specifier = ">=1.5.2" },
{ name = "setuptools", specifier = ">=74.1.0" },
{ name = "sonic-client", specifier = ">=1.0.0" },
{ name = "supervisor", specifier = ">=4.2.5" },
{ name = "typeid-python", specifier = ">=0.3.1" },
+ { name = "typing-extensions", specifier = ">=4.12.2" },
{ name = "tzdata", specifier = ">=2024.2" },
{ name = "ulid-py", specifier = ">=1.1.0" },
{ name = "w3lib", specifier = ">=2.2.1" },
@@ -172,11 +810,9 @@ requires-dist = [
dev = [
{ name = "bottle", specifier = ">=0.13.1" },
{ name = "bumpver", specifier = ">=2023.1129" },
- { name = "django-autotyping", specifier = ">=0.5.1" },
{ name = "django-debug-toolbar", specifier = ">=4.4.6" },
{ name = "djdt-flamegraph", specifier = ">=0.2.13" },
{ name = "flake8", specifier = ">=7.1.1" },
- { name = "homebrew-pypi-poet", specifier = ">=0.10.0" },
{ name = "ipdb", specifier = ">=0.13.13" },
{ name = "logfire", extras = ["django"], specifier = ">=0.51.0" },
{ name = "mypy", specifier = ">=1.11.2" },
@@ -188,10 +824,10 @@ dev = [
{ name = "requests-tracker", specifier = ">=0.3.3" },
{ name = "ruff", specifier = ">=0.6.6" },
{ name = "setuptools", specifier = ">=75.1.0" },
- { name = "sphinx" },
+ { name = "sphinx", specifier = ">=8.1.3" },
{ name = "sphinx-rtd-theme", specifier = ">=2.0.0" },
- { name = "uv" },
- { name = "viztracer" },
+ { name = "uv", specifier = ">=0.4.26" },
+ { name = "viztracer", specifier = ">=0.17.0" },
{ name = "wheel", specifier = ">=0.44.0" },
]
@@ -407,25 +1043,25 @@ wheels = [
[[package]]
name = "bx-django-utils"
-version = "79"
+version = "81"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "bx-py-utils" },
{ name = "django" },
{ name = "python-stdnum" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/62/8e/d56ffeb8f39c176b03439f614526b0b7de2d298bbf3250d6fdd40521cc51/bx_django_utils-79.tar.gz", hash = "sha256:cb66087d4e9396281acf5a4394b749cff3062b66082d5726f6a8a342fdd35d0e", size = 190245 }
+sdist = { url = "https://files.pythonhosted.org/packages/e7/4a/a4087420852629abd835a17f7d41eca9efa93453c6dcaa29697f40195021/bx_django_utils-81.tar.gz", hash = "sha256:0896f53d737ddda3e98085803e9f469abc4b84561d4062ec13aa40b14e9453b8", size = 192245 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/21/a1/dc24b907e2671512826d3c6593f79e4f78f8fc85544fbbf54102bacc08c9/bx_django_utils-79-py3-none-any.whl", hash = "sha256:d50b10ace24b0b363574542faecf04a81029e2fec6d6e6525fe063ed06238e04", size = 199326 },
+ { url = "https://files.pythonhosted.org/packages/28/8e/692dce1f10303c6f4a03f5c2ae646d36b555c6190f17e11a2a469f9bdc48/bx_django_utils-81-py3-none-any.whl", hash = "sha256:b7ca9a801f0a160fd68c5744b7449552a3029484c373b8aaa2f41d0d50431b51", size = 199480 },
]
[[package]]
name = "bx-py-utils"
-version = "104"
+version = "105"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/25/9d/d60b1594c40f63b77752a4cdba5ddb746fc61653ba6ea6f79995198087a9/bx_py_utils-104.tar.gz", hash = "sha256:508cfc1d0fa6c22298f697c4efaa913337847d488d8a53eeccfae9ee106123f6", size = 190865 }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/c3/4949fd3031a26eaf7378befacc5a2858d68a4e328b342e2ffc4c321c9a89/bx_py_utils-105.tar.gz", hash = "sha256:1bb7c1401147df35a95ca78c1de9f25d104aeda941a5cc89f9cfc2d1616ddbd7", size = 192317 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/2e/da/959a65959ae49ad949ed9e1375df12c8b61f4af041d644a81403daf4f915/bx_py_utils-104-py3-none-any.whl", hash = "sha256:c92ebc4fb122e3e3c228d984d0a1f5c3284c3da6aab1a1c753f7eb1f71bdab3a", size = 175501 },
+ { url = "https://files.pythonhosted.org/packages/6c/e5/da929891157b56f7a9bf825118926910e5e3629eb1cd3ec441d292e7501c/bx_py_utils-105-py3-none-any.whl", hash = "sha256:d441b0e413f8b19b03ab1784187ca2cf2ec5b68d64082790bdbca16a4612cb3e", size = 175660 },
]
[[package]]
@@ -622,15 +1258,15 @@ wheels = [
[[package]]
name = "croniter"
-version = "3.0.3"
+version = "5.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "python-dateutil" },
{ name = "pytz" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/44/7a/14b0b14ab0203e2c79493cf487829dc294d5c44bedc810ab2f4a97fc9ff4/croniter-3.0.3.tar.gz", hash = "sha256:34117ec1741f10a7bd0ec3ad7d8f0eb8fa457a2feb9be32e6a2250e158957668", size = 53088 }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/8c/0656200bfa5c1e90b26f4bb1cc8aecb4a7722f8386ee044bdc2d4efb589e/croniter-5.0.1.tar.gz", hash = "sha256:7d9b1ef25b10eece48fdf29d8ac52f9b6252abff983ac614ade4f3276294019e", size = 57084 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/93/6a/f2f68e0f9cf702b6d055ab53cab0d8c100f04e86228ca500a8ca9de94b58/croniter-3.0.3-py2.py3-none-any.whl", hash = "sha256:b3bd11f270dc54ccd1f2397b813436015a86d30ffc5a7a9438eec1ed916f2101", size = 22422 },
+ { url = "https://files.pythonhosted.org/packages/3c/68/34c3d74d2af6ea98ff8a0b50d149cff26e88a3f09817121d1186e9185e97/croniter-5.0.1-py2.py3-none-any.whl", hash = "sha256:eb28439742291f6c10b181df1a5ecf421208b1fc62ef44501daec1780a0b09e9", size = 24149 },
]
[[package]]
@@ -732,15 +1368,16 @@ wheels = [
[[package]]
name = "django-admin-data-views"
-version = "0.4.1"
+version = "0.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "django" },
{ name = "django-settings-holder" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/5a/24/6467910537747af96c6c447b221d0e1c36e64547368700f43aecbbfa3097/django_admin_data_views-0.4.1.tar.gz", hash = "sha256:fbdd2d5d0caf3b1cb1ffac57f7caff0e38f02dfc71dfa4e230c8c50f1741bb61", size = 12073 }
+sdist = { url = "https://files.pythonhosted.org/packages/44/4f/3092990fa7ab550f5ab5b14eb8be272c141a7a768c118fcf3bf5f2c1259c/django_admin_data_views-0.4.2.tar.gz", hash = "sha256:d89310eaeae4e441267a27ba51b7cfe70f91d41da96d1cda73c1e8c46ba52d84", size = 12469 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/2a/4b/087370e416b91dfce3a67bbc079fe202cdacbf6040e8fecf9bc96a66dbaf/django_admin_data_views-0.4.1-py3-none-any.whl", hash = "sha256:ed4988ce2f1c000bfa0ebef3b0126be1284399e03e23763eeb9d2c499745bf08", size = 15242 },
+ { url = "https://files.pythonhosted.org/packages/32/86/5ab784b6a487cf85c9df03f66dcc7ffa817d9fe603fd44c0ba11bf0da590/django_admin_data_views-0.4.2-py3-none-any.whl", hash = "sha256:e7ebfc822187b53ff20f63b975745d660153f9735ab7d2c607bc5f7b90ff7ec2", size = 15319 },
]
[[package]]
@@ -885,16 +1522,16 @@ wheels = [
[[package]]
name = "django-settings-holder"
-version = "0.1.2"
+version = "0.2.2"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1e/36/0ea7d1e6e782c8c8ec7e8a8f0614526e732e3728dee7778d575f35267e3c/django_settings_holder-0.1.2.tar.gz", hash = "sha256:8ab0f2dabf5a1c79ec9e95e97a296808e0f2c48f6f9aa1da1b77b433ee1e2f9e", size = 6454 }
+sdist = { url = "https://files.pythonhosted.org/packages/76/a2/eca3105add8254158ebc67b605a81aceeefc69238e3eae87ed50c2e2c438/django_settings_holder-0.2.2.tar.gz", hash = "sha256:a894e1a0e2573ff72ed752b97f5c8b03cda2745b64d3baff81db4ebd1e505b03", size = 8606 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/10/96/03b2ed31f267eeaf7d339d1f2ecd95d6ab6cb32a7dca3c3338e90a124c9b/django_settings_holder-0.1.2-py3-none-any.whl", hash = "sha256:7a65f888fc1e8427a807be72d43d5f3f242163e0a0eaf33a393592e6fff3e102", size = 8197 },
+ { url = "https://files.pythonhosted.org/packages/4f/55/250e5b80c785e2ca36f7db3346df0ba38ed63930cf791b2ad926a26dd466/django_settings_holder-0.2.2-py3-none-any.whl", hash = "sha256:37f229d44686dd2dc6f82ff75213c90f633c5fea0492df9c5660c775fa5d6941", size = 8689 },
]
[[package]]
name = "django-signal-webhooks"
-version = "0.3.0"
+version = "0.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "asgiref" },
@@ -902,15 +1539,16 @@ dependencies = [
{ name = "django" },
{ name = "django-settings-holder" },
{ name = "httpx" },
+ { name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/1974349def31611eeb689651d50c744c1a0810d36dc3830a961ea950578e/django_signal_webhooks-0.3.0.tar.gz", hash = "sha256:3efff4305a8c0555a17ce8f4cbb1006014afd7314862647db5724e06eec4493e", size = 16566 }
+sdist = { url = "https://files.pythonhosted.org/packages/41/15/865e72e1da78bc6c6865ff16b0dffb11db62999fc91bed8c3c1668eac4c1/django_signal_webhooks-0.3.1.tar.gz", hash = "sha256:23dc439be2fdea24b746726495eb1a7a59440809056482eebceb153d050a3f5b", size = 17806 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/b0/75/63944fa0d6a854ce59158f5a02e4afc4d64dab5a1ddb3f25efe8747fdf10/django_signal_webhooks-0.3.0-py3-none-any.whl", hash = "sha256:64be32ff06c1b74fe80176395258cfb51f1757fed28f026285f38a44d559c00f", size = 22571 },
+ { url = "https://files.pythonhosted.org/packages/eb/7a/0f193eb3351af74de8c3d0fa89f72005caf63ad9456e281e5cd9b2be1a10/django_signal_webhooks-0.3.1-py3-none-any.whl", hash = "sha256:863beb94f6536a09b04d516df6103037748891f5f4555df36796fb54c8649854", size = 22940 },
]
[[package]]
name = "django-stubs"
-version = "5.1.0"
+version = "5.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "asgiref" },
@@ -920,22 +1558,22 @@ dependencies = [
{ name = "types-pyyaml" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/47/35/fa62c35c48e77bc4dabfe56d16786a2e9965ff89c4c55ab909c2d9f00ce8/django_stubs-5.1.0.tar.gz", hash = "sha256:86128c228b65e6c9a85e5dc56eb1c6f41125917dae0e21e6cfecdf1b27e630c5", size = 265839 }
+sdist = { url = "https://files.pythonhosted.org/packages/bf/60/1ae90eb6e2e107bc64a3de9de78a5add7f3b85e491113504eed38d6d2c63/django_stubs-5.1.1.tar.gz", hash = "sha256:126d354bbdff4906c4e93e6361197f6fbfb6231c3df6def85a291dae6f9f577b", size = 265624 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/1c/d8/4561cf32a652f12d1f6edf27ac1ed6194540b44592cc85ead62a1f6fdff6/django_stubs-5.1.0-py3-none-any.whl", hash = "sha256:b98d49a80aa4adf1433a97407102d068de26c739c405431d93faad96dd282c40", size = 470607 },
+ { url = "https://files.pythonhosted.org/packages/98/c8/3081d5f994351248fcd60f9aab10cb2020bdd7df0f14e80854373e15d7d4/django_stubs-5.1.1-py3-none-any.whl", hash = "sha256:c4dc64260bd72e6d32b9e536e8dd0d9247922f0271f82d1d5132a18f24b388ac", size = 470790 },
]
[[package]]
name = "django-stubs-ext"
-version = "5.1.0"
+version = "5.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "django" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/60/a5/dfb120bf3ce1f0da109481605f704ffe72533f056f42e8cffd5a486504a7/django_stubs_ext-5.1.0.tar.gz", hash = "sha256:ed7d51c0b731651879fc75f331fb0806d98b67bfab464e96e2724db6b46ef926", size = 9491 }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/62/a7129909d3c94eac957c02eeb05ac57cbca81db4f3f6270a8503697f376a/django_stubs_ext-5.1.1.tar.gz", hash = "sha256:db7364e4f50ae7e5360993dbd58a3a57ea4b2e7e5bab0fbd525ccdb3e7975d1c", size = 9455 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/eb/03/20a5a19d1b8d26eddd8420261304ee9e6accd802f5332e360daaa2202afb/django_stubs_ext-5.1.0-py3-none-any.whl", hash = "sha256:a455fc222c90b30b29ad8c53319559f5b54a99b4197205ddbb385aede03b395d", size = 8966 },
+ { url = "https://files.pythonhosted.org/packages/6a/ed/f79ae5ad993bdf900d61892d2a9fc0145441a507a7579890fb8e21e4a7bc/django_stubs_ext-5.1.1-py3-none-any.whl", hash = "sha256:3907f99e178c93323e2ce908aef8352adb8c047605161f8d9e5e7b4efb5a6a9c", size = 8965 },
]
[[package]]
@@ -970,11 +1608,11 @@ wheels = [
[[package]]
name = "et-xmlfile"
-version = "1.1.0"
+version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/3d/5d/0413a31d184a20c763ad741cc7852a659bf15094c24840c5bdd1754765cd/et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c", size = 3218 }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/96/c2/3dd434b0108730014f1b96fd286040dc3bcb70066346f7e01ec2ac95865f/et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada", size = 4688 },
+ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
]
[[package]]
@@ -1023,14 +1661,14 @@ wheels = [
[[package]]
name = "ftfy"
-version = "6.3.0"
+version = "6.3.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "wcwidth" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/85/c3/63753eca4c5257ce0561cb5f8e9cd0d45d97848c73c56e33a0a764319e5b/ftfy-6.3.0.tar.gz", hash = "sha256:1c7d6418e72b25a7760feb150acf574b86924dbb2e95b32c0b3abbd1ba3d7ad6", size = 362118 }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/d3/8650919bc3c7c6e90ee3fa7fd618bf373cbbe55dff043bd67353dbb20cd8/ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec", size = 308927 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/76/0f/d8a8152e720cbcad890e56ee98639ff489f1992869b4cf304c3fa24d4bcc/ftfy-6.3.0-py3-none-any.whl", hash = "sha256:17aca296801f44142e3ff2c16f93fbf6a87609ebb3704a9a41dd5d4903396caf", size = 44778 },
+ { url = "https://files.pythonhosted.org/packages/ab/6e/81d47999aebc1b155f81eca4477a616a70f238a2549848c38983f3c22a82/ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083", size = 44821 },
]
[[package]]
@@ -1054,19 +1692,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
]
-[[package]]
-name = "homebrew-pypi-poet"
-version = "0.10.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "jinja2" },
- { name = "setuptools" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f7/d9/4b525af3be6ac0a0a962e101b7771db6511d9e96369ded2765406233f9ff/homebrew-pypi-poet-0.10.0.tar.gz", hash = "sha256:e09e997e35a98f66445f9a39ccb33d6d93c5cd090302a59f231707eac0bf378e", size = 5953 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/3b/85/998232eae0b5c6798c7140ef37d2c1be02ea06cd38dd80169b3abd63b600/homebrew_pypi_poet-0.10.0-py2.py3-none-any.whl", hash = "sha256:65824f97aea0e713c4ac18aa2ef4477aca69426554eac842eeaaddf97df3fc47", size = 7813 },
-]
-
[[package]]
name = "httpcore"
version = "1.0.6"
@@ -1182,7 +1807,7 @@ wheels = [
[[package]]
name = "ipython"
-version = "8.28.0"
+version = "8.29.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
@@ -1197,9 +1822,9 @@ dependencies = [
{ name = "traitlets" },
{ name = "typing-extensions", marker = "python_full_version < '3.12'" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/f7/21/48db7d9dd622b9692575004c7c98f85f5629428f58596c59606d36c51b58/ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a", size = 5495762 }
+sdist = { url = "https://files.pythonhosted.org/packages/85/e0/a3f36dde97e12121106807d80485423ae4c5b27ce60d40d4ab0bab18a9db/ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb", size = 5497513 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/f4/3a/5d8680279ada9571de8469220069d27024ee47624af534e537c9ff49a450/ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35", size = 819456 },
+ { url = "https://files.pythonhosted.org/packages/c5/a5/c15ed187f1b3fac445bb42a2dedd8dec1eee1718b35129242049a13a962f/ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8", size = 819911 },
]
[[package]]
@@ -1272,7 +1897,7 @@ wheels = [
[[package]]
name = "logfire"
-version = "1.2.0"
+version = "2.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "executing" },
@@ -1284,9 +1909,9 @@ dependencies = [
{ name = "tomli", marker = "python_full_version < '3.11'" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/78/85/ce3e4ecc59a0126eaa9802f7d928d6efa837e63619dfec37654fb2d1f1c1/logfire-1.2.0.tar.gz", hash = "sha256:71866c4ce2f604b307ff0cc1a9b1254ea68b3c46f42bffd6ac36fc4db5abb62b", size = 240418 }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/b8/b4f3a741076a9bdce82ed25218a8167d74c9834588710babc03cb587773a/logfire-2.1.1.tar.gz", hash = "sha256:fd0b9a8b3334cd8c7efb52c04297c2360380818a021e8024ca37bae5f32b78aa", size = 244832 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/7d/7f/37d9c3cbed1ef23b467c0c0039f35524595f8fd79f3acb54e647a0ccd590/logfire-1.2.0-py3-none-any.whl", hash = "sha256:edb2b441e418cf31877bd97e24b3755f873bb423f834cca66f315b25bde61ebd", size = 164724 },
+ { url = "https://files.pythonhosted.org/packages/7e/93/905aef6a938fdd8633cf4937a35ae2438e2830788e8465588e1063ab79b5/logfire-2.1.1-py3-none-any.whl", hash = "sha256:5ead7b0f3edf6cab9bbe9a02e0f6a4c5f3f693411928b32b727ecb3d2b709814", size = 167207 },
]
[package.optional-dependencies]
@@ -1420,36 +2045,36 @@ wheels = [
[[package]]
name = "mypy"
-version = "1.12.1"
+version = "1.13.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "mypy-extensions" },
{ name = "tomli", marker = "python_full_version < '3.11'" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/17/03/744330105a74dc004578f47ec27e1bf66b1dd5664ea444d18423e41343bd/mypy-1.12.1.tar.gz", hash = "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d", size = 3150767 }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/21/7e9e523537991d145ab8a0a2fd98548d67646dc2aaaf6091c31ad883e7c1/mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", size = 3152532 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/16/90/3a83d3bcff2eb85151723f116336bd545995b5260a49d3e0d95213fcc2d7/mypy-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801", size = 11017908 },
- { url = "https://files.pythonhosted.org/packages/e4/5c/d6b32ddde2460fc63168ca0f7bf44f38474353547f7c0304a30023c40aa0/mypy-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5", size = 10184164 },
- { url = "https://files.pythonhosted.org/packages/42/5e/680aa37c938e6db23bd7e6dd4d38d7e609998491721e453b32ec10d31e7f/mypy-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1", size = 12587852 },
- { url = "https://files.pythonhosted.org/packages/9e/0f/9cafea1c3aaf852cfa1d4a387f33923b6d9714b5c16eb0469da67c5c31e4/mypy-1.12.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627", size = 13106489 },
- { url = "https://files.pythonhosted.org/packages/ea/c3/7f56d5d87a81e665de8dfa424120ab3a6954ae5854946cec0a46f78f6168/mypy-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20", size = 9634753 },
- { url = "https://files.pythonhosted.org/packages/18/0a/70de7c97a86cb85535077ab5cef1cbc4e2812fd2e9cc21d78eb561a6b80f/mypy-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735", size = 10940998 },
- { url = "https://files.pythonhosted.org/packages/c0/97/9ed6d4834d7549936ab88533b302184fb568a0940c4000d2aaee6dc07112/mypy-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66", size = 10108523 },
- { url = "https://files.pythonhosted.org/packages/48/41/1686f37d09c915dfc5b683e20cc99dabac199900b5ca6d22747b99ddcb50/mypy-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6", size = 12505553 },
- { url = "https://files.pythonhosted.org/packages/8d/2b/2dbcaa7e97b23f27ced77493256ee878f4a140ac750e198630ff1b9b60c6/mypy-1.12.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931", size = 12988634 },
- { url = "https://files.pythonhosted.org/packages/54/55/710d082e91a2ccaea21214229b11f9215a9d22446f949491b5457655e82b/mypy-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811", size = 9630747 },
- { url = "https://files.pythonhosted.org/packages/8a/74/b9e0e4f06e951e277058f878302faa154d282ca11274c59fe08353f52949/mypy-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f", size = 11079902 },
- { url = "https://files.pythonhosted.org/packages/9f/62/fcad290769db3eb0de265094cef5c94d6075c70bc1e42b67eee4ca192dcc/mypy-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0", size = 10072373 },
- { url = "https://files.pythonhosted.org/packages/cb/27/9ac78349c2952e4446288ec1174675ab9e0160ed18c2cb1154fa456c54e8/mypy-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042", size = 12589779 },
- { url = "https://files.pythonhosted.org/packages/7c/4a/58cebd122cf1cba95680ac51303fbeb508392413ca64e3e711aa7d4877aa/mypy-1.12.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179", size = 13044459 },
- { url = "https://files.pythonhosted.org/packages/5b/c7/672935e2a3f9bcc07b1b870395a653f665657bef3cdaa504ad99f56eadf0/mypy-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a", size = 9731919 },
- { url = "https://files.pythonhosted.org/packages/bb/b0/092be5094840a401940c95224f63bb2a8f09bce9251ac1df180ec523830c/mypy-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc", size = 11068611 },
- { url = "https://files.pythonhosted.org/packages/9a/86/f20f53b8f062876c39602243d7a59b5cabd6b24315d8de511d607fa4de6a/mypy-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635", size = 10068036 },
- { url = "https://files.pythonhosted.org/packages/84/c7/1dbd6575785522da1d4c1ac2c419505fcf23bee74811880cac447a4a77ab/mypy-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81", size = 12585671 },
- { url = "https://files.pythonhosted.org/packages/46/8a/f6ae18b446eb2bccce54c4bd94065bcfe417d6c67021dcc032bf1e720aff/mypy-1.12.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4", size = 13036083 },
- { url = "https://files.pythonhosted.org/packages/59/e6/fc65fde3dc7156fce8d49ba21c7b1f5d866ad50467bf196ca94a7f6d2c9e/mypy-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02", size = 9735467 },
- { url = "https://files.pythonhosted.org/packages/84/6b/1db9de4e0764778251fb2d64cb7455cf6db75dc99c9f72c8b7e74b6a8a17/mypy-1.12.1-py3-none-any.whl", hash = "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e", size = 2646060 },
+ { url = "https://files.pythonhosted.org/packages/5e/8c/206de95a27722b5b5a8c85ba3100467bd86299d92a4f71c6b9aa448bfa2f/mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a", size = 11020731 },
+ { url = "https://files.pythonhosted.org/packages/ab/bb/b31695a29eea76b1569fd28b4ab141a1adc9842edde080d1e8e1776862c7/mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80", size = 10184276 },
+ { url = "https://files.pythonhosted.org/packages/a5/2d/4a23849729bb27934a0e079c9c1aad912167d875c7b070382a408d459651/mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7", size = 12587706 },
+ { url = "https://files.pythonhosted.org/packages/5c/c3/d318e38ada50255e22e23353a469c791379825240e71b0ad03e76ca07ae6/mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f", size = 13105586 },
+ { url = "https://files.pythonhosted.org/packages/4a/25/3918bc64952370c3dbdbd8c82c363804678127815febd2925b7273d9482c/mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372", size = 9632318 },
+ { url = "https://files.pythonhosted.org/packages/d0/19/de0822609e5b93d02579075248c7aa6ceaddcea92f00bf4ea8e4c22e3598/mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d", size = 10939027 },
+ { url = "https://files.pythonhosted.org/packages/c8/71/6950fcc6ca84179137e4cbf7cf41e6b68b4a339a1f5d3e954f8c34e02d66/mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d", size = 10108699 },
+ { url = "https://files.pythonhosted.org/packages/26/50/29d3e7dd166e74dc13d46050b23f7d6d7533acf48f5217663a3719db024e/mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b", size = 12506263 },
+ { url = "https://files.pythonhosted.org/packages/3f/1d/676e76f07f7d5ddcd4227af3938a9c9640f293b7d8a44dd4ff41d4db25c1/mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73", size = 12984688 },
+ { url = "https://files.pythonhosted.org/packages/9c/03/5a85a30ae5407b1d28fab51bd3e2103e52ad0918d1e68f02a7778669a307/mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca", size = 9626811 },
+ { url = "https://files.pythonhosted.org/packages/fb/31/c526a7bd2e5c710ae47717c7a5f53f616db6d9097caf48ad650581e81748/mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5", size = 11077900 },
+ { url = "https://files.pythonhosted.org/packages/83/67/b7419c6b503679d10bd26fc67529bc6a1f7a5f220bbb9f292dc10d33352f/mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e", size = 10074818 },
+ { url = "https://files.pythonhosted.org/packages/ba/07/37d67048786ae84e6612575e173d713c9a05d0ae495dde1e68d972207d98/mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2", size = 12589275 },
+ { url = "https://files.pythonhosted.org/packages/1f/17/b1018c6bb3e9f1ce3956722b3bf91bff86c1cefccca71cec05eae49d6d41/mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0", size = 13037783 },
+ { url = "https://files.pythonhosted.org/packages/cb/32/cd540755579e54a88099aee0287086d996f5a24281a673f78a0e14dba150/mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2", size = 9726197 },
+ { url = "https://files.pythonhosted.org/packages/11/bb/ab4cfdc562cad80418f077d8be9b4491ee4fb257440da951b85cbb0a639e/mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7", size = 11069721 },
+ { url = "https://files.pythonhosted.org/packages/59/3b/a393b1607cb749ea2c621def5ba8c58308ff05e30d9dbdc7c15028bca111/mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62", size = 10063996 },
+ { url = "https://files.pythonhosted.org/packages/d1/1f/6b76be289a5a521bb1caedc1f08e76ff17ab59061007f201a8a18cc514d1/mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8", size = 12584043 },
+ { url = "https://files.pythonhosted.org/packages/a6/83/5a85c9a5976c6f96e3a5a7591aa28b4a6ca3a07e9e5ba0cec090c8b596d6/mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7", size = 13036996 },
+ { url = "https://files.pythonhosted.org/packages/b4/59/c39a6f752f1f893fccbcf1bdd2aca67c79c842402b5283563d006a67cf76/mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc", size = 9737709 },
+ { url = "https://files.pythonhosted.org/packages/3b/86/72ce7f57431d87a7ff17d442f521146a6585019eb8f4f31b7c02801f78ad/mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a", size = 2647043 },
]
[[package]]
@@ -1679,20 +2304,20 @@ wheels = [
[[package]]
name = "phonenumbers"
-version = "8.13.47"
+version = "8.13.48"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ae/0c/8f315d5e6ddea2e45ae13ada6936df6240858929881daf20cb3133fdb729/phonenumbers-8.13.47.tar.gz", hash = "sha256:53c5e7c6d431cafe4efdd44956078404ae9bc8b0eacc47be3105d3ccc88aaffa", size = 2297081 }
+sdist = { url = "https://files.pythonhosted.org/packages/61/59/d01506a791481d26a640acb0a1124e3f0a816b0711e563962d7d55184890/phonenumbers-8.13.48.tar.gz", hash = "sha256:62d8df9b0f3c3c41571c6b396f044ddd999d61631534001b8be7fdf7ba1b18f3", size = 2297098 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/b6/0b/5cde445764ac72460748107e999b026b7245e3fcc5fd5551cc5aff45e469/phonenumbers-8.13.47-py2.py3-none-any.whl", hash = "sha256:5d3c0142ef7055ca5551884352e3b6b93bfe002a0bc95b8eaba39b0e2184541b", size = 2582530 },
+ { url = "https://files.pythonhosted.org/packages/98/f4/a9340f98335ae6fab1ad4b56b6a04f390de65bea371c71b0cdf67e4c08d0/phonenumbers-8.13.48-py2.py3-none-any.whl", hash = "sha256:5c51939acefa390eb74119750afb10a85d3c628dc83fd62c52d6f532fcf5d205", size = 2582542 },
]
[[package]]
name = "pip"
-version = "24.2"
+version = "24.3.1"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4d/87/fb90046e096a03aeab235e139436b3fe804cdd447ed2093b0d70eba3f7f8/pip-24.2.tar.gz", hash = "sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8", size = 1922041 }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/b1/b422acd212ad7eedddaf7981eee6e5de085154ff726459cf2da7c5a184c1/pip-24.3.1.tar.gz", hash = "sha256:ebcb60557f2aefabc2e0f918751cd24ea0d56d8ec5445fe1807f1d2109660b99", size = 1931073 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/d4/55/90db48d85f7689ec6f81c0db0622d704306c5284850383c090e6c7195a5c/pip-24.2-py3-none-any.whl", hash = "sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2", size = 1815170 },
+ { url = "https://files.pythonhosted.org/packages/ef/7d/500c9ad20238fcfcb4cb9243eede163594d7020ce87bd9610c9e02771876/pip-24.3.1-py3-none-any.whl", hash = "sha256:3790624780082365f47549d032f3770eeb2b1e8bd1f7b2e02dace1afa361b4ed", size = 1822182 },
]
[[package]]
@@ -1713,6 +2338,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
]
+[[package]]
+name = "pocket"
+version = "0.3.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/b6/cd79a0e237e733e2f8a196f4e9f4d30d99c769b809c5fbbea9e34400655d/pocket-0.3.6.tar.gz", hash = "sha256:907bf16a19fae9c2080f799d979de4c8daa36d6d28e86ceb9fc17d6f0bdb89b9", size = 3749 }
+
[[package]]
name = "prompt-toolkit"
version = "3.0.48"
@@ -1945,15 +2579,15 @@ wheels = [
[[package]]
name = "pydantic-settings"
-version = "2.6.0"
+version = "2.6.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pydantic" },
{ name = "python-dotenv" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/6c/66/5f1a9da10675bfb3b9da52f5b689c77e0a5612263fcce510cfac3e99a168/pydantic_settings-2.6.0.tar.gz", hash = "sha256:44a1804abffac9e6a30372bb45f6cafab945ef5af25e66b1c634c01dd39e0188", size = 75232 }
+sdist = { url = "https://files.pythonhosted.org/packages/b5/d4/9dfbe238f45ad8b168f5c96ee49a3df0598ce18a0795a983b419949ce65b/pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0", size = 75646 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/34/19/26bb6bdb9fdad5f0dfce538780814084fb667b4bc37fcb28459c14b8d3b5/pydantic_settings-2.6.0-py3-none-any.whl", hash = "sha256:4a819166f119b74d7f8c765196b165f95cc7487ce58ea27dec8a5a26be0970e0", size = 28578 },
+ { url = "https://files.pythonhosted.org/packages/5e/f9/ff95fd7d760af42f647ea87f9b8a383d891cdb5e5dbd4613edaeb094252a/pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87", size = 28595 },
]
[[package]]
@@ -2097,6 +2731,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", size = 10051 },
]
+[[package]]
+name = "python-statemachine"
+version = "2.3.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/20/c9/7034a362ce151f9fa0ead5630727a16122f7a5ed235d42447910dff95b6a/python_statemachine-2.3.6.tar.gz", hash = "sha256:9cb4040ca7f2158d3cd46f36a77b420b6ef95a90223928a7f3cab232a70bd560", size = 36735 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/39/04/19a44b549cbaae1ac6c2acc58afb96b71209da866713877f40aab2f45de6/python_statemachine-2.3.6-py3-none-any.whl", hash = "sha256:0001b02cbe2f5b2420c423b5b3e3a33915447ac6d9735219c929e2378d454f5f", size = 41529 },
+]
+
[[package]]
name = "python-stdnum"
version = "1.20"
@@ -2272,58 +2915,58 @@ wheels = [
[[package]]
name = "rich"
-version = "13.9.2"
+version = "13.9.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markdown-it-py" },
{ name = "pygments" },
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/aa/9e/1784d15b057b0075e5136445aaea92d23955aad2c93eaede673718a40d95/rich-13.9.2.tar.gz", hash = "sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c", size = 222843 }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/67/91/5474b84e505a6ccc295b2d322d90ff6aa0746745717839ee0c5fb4fdcceb/rich-13.9.2-py3-none-any.whl", hash = "sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1", size = 242117 },
+ { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 },
]
[[package]]
name = "rich-argparse"
-version = "1.5.2"
+version = "1.6.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "rich" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/26/f1/0a5ba24d684012e2f25deec88d9a9a1199d8e26e3bb595b812c8b0218cff/rich_argparse-1.5.2.tar.gz", hash = "sha256:84d348d5b6dafe99fffe2c7ea1ca0afe14096c921693445b9eee65ee4fcbfd2c", size = 17142 }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/ee/c410251ff6123d4417f2fe8e72c8628f187682b70ce34134a2a3e307a2d5/rich_argparse-1.6.0.tar.gz", hash = "sha256:092083c30da186f25bcdff8b1d47fdfb571288510fb051e0488a72cc3128de13", size = 17499 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/77/83/4585bd18f0cda471ce44b8364620dc9cbb7ce7179b923123ad3feddf99da/rich_argparse-1.5.2-py3-none-any.whl", hash = "sha256:7027503d5849e27fc7cc85fb58504363606f2ec1c8b3c27d9a8ad28788faf877", size = 19777 },
+ { url = "https://files.pythonhosted.org/packages/25/45/54b95bb72bb17c27a7252bee5034955020b5869a33918b660ffc29cbf608/rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7", size = 20072 },
]
[[package]]
name = "ruff"
-version = "0.7.0"
+version = "0.7.2"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2c/c7/f3367d1da5d568192968c5c9e7f3d51fb317b9ac04828493b23d8fce8ce6/ruff-0.7.0.tar.gz", hash = "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b", size = 3146645 }
+sdist = { url = "https://files.pythonhosted.org/packages/95/51/231bb3790e5b0b9fd4131f9a231d73d061b3667522e3f406fd9b63334d0e/ruff-0.7.2.tar.gz", hash = "sha256:2b14e77293380e475b4e3a7a368e14549288ed2931fce259a6f99978669e844f", size = 3210036 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/48/59/a0275a0913f3539498d116046dd679cd657fe3b7caf5afe1733319414932/ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628", size = 10434007 },
- { url = "https://files.pythonhosted.org/packages/cd/94/da0ba5f956d04c90dd899209904210600009dcda039ce840d83eb4298c7d/ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737", size = 10048066 },
- { url = "https://files.pythonhosted.org/packages/57/1d/e5cc149ecc46e4f203403a79ccd170fad52d316f98b87d0f63b1945567db/ruff-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06", size = 9711389 },
- { url = "https://files.pythonhosted.org/packages/05/67/fb7ea2c869c539725a16c5bc294e9aa34f8b1b6fe702f1d173a5da517c2b/ruff-0.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be", size = 10755174 },
- { url = "https://files.pythonhosted.org/packages/5f/f0/13703bc50536a0613ea3dce991116e5f0917a1f05528c6ab738b33c08d3f/ruff-0.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa", size = 10196040 },
- { url = "https://files.pythonhosted.org/packages/99/c1/77b04ab20324ab03d333522ee55fb0f1c38e3ca0d326b4905f82ce6b6c70/ruff-0.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495", size = 11033684 },
- { url = "https://files.pythonhosted.org/packages/f2/97/f463334dc4efeea3551cd109163df15561c18a1c3ec13d51643740fd36ba/ruff-0.7.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598", size = 11803700 },
- { url = "https://files.pythonhosted.org/packages/b4/f8/a31d40c4bb92933d376a53e7c5d0245d9b27841357e4820e96d38f54b480/ruff-0.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e", size = 11347848 },
- { url = "https://files.pythonhosted.org/packages/83/62/0c133b35ddaf91c65c30a56718b80bdef36bfffc35684d29e3a4878e0ea3/ruff-0.7.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914", size = 12480632 },
- { url = "https://files.pythonhosted.org/packages/46/96/464058dd1d980014fb5aa0a1254e78799efb3096fc7a4823cd66a1621276/ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9", size = 10941919 },
- { url = "https://files.pythonhosted.org/packages/a0/f7/bda37ec77986a435dde44e1f59374aebf4282a5fa9cf17735315b847141f/ruff-0.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4", size = 10745519 },
- { url = "https://files.pythonhosted.org/packages/c2/33/5f77fc317027c057b61a848020a47442a1cbf12e592df0e41e21f4d0f3bd/ruff-0.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9", size = 10284872 },
- { url = "https://files.pythonhosted.org/packages/ff/50/98aec292bc9537f640b8d031c55f3414bf15b6ed13b3e943fed75ac927b9/ruff-0.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d", size = 10600334 },
- { url = "https://files.pythonhosted.org/packages/f2/85/12607ae3201423a179b8cfadc7cb1e57d02cd0135e45bd0445acb4cef327/ruff-0.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11", size = 11017333 },
- { url = "https://files.pythonhosted.org/packages/d4/7f/3b85a56879e705d5f46ec14daf8a439fca05c3081720fe3dc3209100922d/ruff-0.7.0-py3-none-win32.whl", hash = "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec", size = 8570962 },
- { url = "https://files.pythonhosted.org/packages/39/9f/c5ee2b40d377354dabcc23cff47eb299de4b4d06d345068f8f8cc1eadac8/ruff-0.7.0-py3-none-win_amd64.whl", hash = "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2", size = 9365544 },
- { url = "https://files.pythonhosted.org/packages/89/8b/ee1509f60148cecba644aa718f6633216784302458340311898aaf0b1bed/ruff-0.7.0-py3-none-win_arm64.whl", hash = "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e", size = 8695763 },
+ { url = "https://files.pythonhosted.org/packages/5c/56/0caa2b5745d66a39aa239c01059f6918fc76ed8380033d2f44bf297d141d/ruff-0.7.2-py3-none-linux_armv6l.whl", hash = "sha256:b73f873b5f52092e63ed540adefc3c36f1f803790ecf2590e1df8bf0a9f72cb8", size = 10373973 },
+ { url = "https://files.pythonhosted.org/packages/1a/33/cad6ff306731f335d481c50caa155b69a286d5b388e87ff234cd2a4b3557/ruff-0.7.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5b813ef26db1015953daf476202585512afd6a6862a02cde63f3bafb53d0b2d4", size = 10171140 },
+ { url = "https://files.pythonhosted.org/packages/97/f5/6a2ca5c9ba416226eac9cf8121a1baa6f06655431937e85f38ffcb9d0d01/ruff-0.7.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:853277dbd9675810c6826dad7a428d52a11760744508340e66bf46f8be9701d9", size = 9809333 },
+ { url = "https://files.pythonhosted.org/packages/16/83/e3e87f13d1a1dc205713632978cd7bc287a59b08bc95780dbe359b9aefcb/ruff-0.7.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21aae53ab1490a52bf4e3bf520c10ce120987b047c494cacf4edad0ba0888da2", size = 10622987 },
+ { url = "https://files.pythonhosted.org/packages/22/16/97ccab194480e99a2e3c77ae132b3eebfa38c2112747570c403a4a13ba3a/ruff-0.7.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc7e0fc6e0cb3168443eeadb6445285abaae75142ee22b2b72c27d790ab60ba", size = 10184640 },
+ { url = "https://files.pythonhosted.org/packages/97/1b/82ff05441b036f68817296c14f24da47c591cb27acfda473ee571a5651ac/ruff-0.7.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd77877a4e43b3a98e5ef4715ba3862105e299af0c48942cc6d51ba3d97dc859", size = 11210203 },
+ { url = "https://files.pythonhosted.org/packages/a6/96/7ecb30a7ef7f942e2d8e0287ad4c1957dddc6c5097af4978c27cfc334f97/ruff-0.7.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e00163fb897d35523c70d71a46fbaa43bf7bf9af0f4534c53ea5b96b2e03397b", size = 11870894 },
+ { url = "https://files.pythonhosted.org/packages/06/6a/c716bb126218227f8e604a9c484836257708a05ee3d2ebceb666ff3d3867/ruff-0.7.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3c54b538633482dc342e9b634d91168fe8cc56b30a4b4f99287f4e339103e88", size = 11449533 },
+ { url = "https://files.pythonhosted.org/packages/e6/2f/3a5f9f9478904e5ae9506ea699109070ead1e79aac041e872cbaad8a7458/ruff-0.7.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b792468e9804a204be221b14257566669d1db5c00d6bb335996e5cd7004ba80", size = 12607919 },
+ { url = "https://files.pythonhosted.org/packages/a0/57/4642e57484d80d274750dcc872ea66655bbd7e66e986fede31e1865b463d/ruff-0.7.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dba53ed84ac19ae4bfb4ea4bf0172550a2285fa27fbb13e3746f04c80f7fa088", size = 11016915 },
+ { url = "https://files.pythonhosted.org/packages/4d/6d/59be6680abee34c22296ae3f46b2a3b91662b8b18ab0bf388b5eb1355c97/ruff-0.7.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b19fafe261bf741bca2764c14cbb4ee1819b67adb63ebc2db6401dcd652e3748", size = 10625424 },
+ { url = "https://files.pythonhosted.org/packages/82/e7/f6a643683354c9bc7879d2f228ee0324fea66d253de49273a0814fba1927/ruff-0.7.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:28bd8220f4d8f79d590db9e2f6a0674f75ddbc3847277dd44ac1f8d30684b828", size = 10233692 },
+ { url = "https://files.pythonhosted.org/packages/d7/48/b4e02fc835cd7ed1ee7318d9c53e48bcf6b66301f55925a7dcb920e45532/ruff-0.7.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9fd67094e77efbea932e62b5d2483006154794040abb3a5072e659096415ae1e", size = 10751825 },
+ { url = "https://files.pythonhosted.org/packages/1e/06/6c5ee6ab7bb4cbad9e8bb9b2dd0d818c759c90c1c9e057c6ed70334b97f4/ruff-0.7.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:576305393998b7bd6c46018f8104ea3a9cb3fa7908c21d8580e3274a3b04b691", size = 11074811 },
+ { url = "https://files.pythonhosted.org/packages/a1/16/8969304f25bcd0e4af1778342e63b715e91db8a2dbb51807acd858cba915/ruff-0.7.2-py3-none-win32.whl", hash = "sha256:fa993cfc9f0ff11187e82de874dfc3611df80852540331bc85c75809c93253a8", size = 8650268 },
+ { url = "https://files.pythonhosted.org/packages/d9/18/c4b00d161def43fe5968e959039c8f6ce60dca762cec4a34e4e83a4210a0/ruff-0.7.2-py3-none-win_amd64.whl", hash = "sha256:dd8800cbe0254e06b8fec585e97554047fb82c894973f7ff18558eee33d1cb88", size = 9433693 },
+ { url = "https://files.pythonhosted.org/packages/7f/7b/c920673ac01c19814dd15fc617c02301c522f3d6812ca2024f4588ed4549/ruff-0.7.2-py3-none-win_arm64.whl", hash = "sha256:bb8368cd45bba3f57bb29cbb8d64b4a33f8415d0149d2655c5c8539452ce7760", size = 8735845 },
]
[[package]]
name = "service-identity"
-version = "24.1.0"
+version = "24.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
@@ -2331,18 +2974,18 @@ dependencies = [
{ name = "pyasn1" },
{ name = "pyasn1-modules" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/38/d2/2ac20fd05f1b6fce31986536da4caeac51ed2e1bb25d4a7d73ca4eccdfab/service_identity-24.1.0.tar.gz", hash = "sha256:6829c9d62fb832c2e1c435629b0a8c476e1929881f28bee4d20bc24161009221", size = 40183 }
+sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/3b/92/44669afe6354a7bed9968013862118c401690d8b5a805bab75ac1764845f/service_identity-24.1.0-py3-none-any.whl", hash = "sha256:a28caf8130c8a5c1c7a6f5293faaf239bbfb7751e4862436920ee6f2616f568a", size = 12037 },
+ { url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364 },
]
[[package]]
name = "setuptools"
-version = "75.2.0"
+version = "75.3.0"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/07/37/b31be7e4b9f13b59cde9dcaeff112d401d49e0dc5b37ed4a9fc8fb12f409/setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec", size = 1350308 }
+sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/31/2d/90165d51ecd38f9a02c6832198c13a4e48652485e2ccf863ebb942c531b6/setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8", size = 1249825 },
+ { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 },
]
[[package]]
@@ -2577,7 +3220,7 @@ wheels = [
[[package]]
name = "twisted"
-version = "24.7.0"
+version = "24.10.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "attrs" },
@@ -2588,9 +3231,9 @@ dependencies = [
{ name = "typing-extensions" },
{ name = "zope-interface" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/8b/bf/f30eb89bcd14a21a36b4cd3d96658432d4c590af3c24bbe08ea77fa7bbbb/twisted-24.7.0.tar.gz", hash = "sha256:5a60147f044187a127ec7da96d170d49bcce50c6fd36f594e60f4587eff4d394", size = 3516844 }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/0f/2d0b0dcd52a849db64ff63619aead94ae1091fe4d4d7e100371efe513585/twisted-24.10.0.tar.gz", hash = "sha256:02951299672595fea0f70fa2d5f7b5e3d56836157eda68859a6ad6492d36756e", size = 3525999 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/49/d2/7b3e869b983fbf29d770fc2893f8df7c1739c6ff03a2b926b4fc43e4263e/twisted-24.7.0-py3-none-any.whl", hash = "sha256:734832ef98108136e222b5230075b1079dad8a3fc5637319615619a7725b0c81", size = 3181556 },
+ { url = "https://files.pythonhosted.org/packages/f9/7c/f80f6853d702782edb357190c42c3973f13c547a5f68ab1b17e6415061b8/twisted-24.10.0-py3-none-any.whl", hash = "sha256:67aa7c8aa94387385302acf44ade12967c747858c8bcce0f11d38077a11c5326", size = 3188753 },
]
[package.optional-dependencies]
@@ -2689,27 +3332,27 @@ wheels = [
[[package]]
name = "uv"
-version = "0.4.25"
+version = "0.4.29"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d0/bc/1a013408b7f9f437385705652f404b6b15127ecf108327d13be493bdfb81/uv-0.4.25.tar.gz", hash = "sha256:d39077cdfe3246885fcdf32e7066ae731a166101d063629f9cea08738f79e6a3", size = 2064863 }
+sdist = { url = "https://files.pythonhosted.org/packages/6a/23/6e8d8177112b40d4905a49c03d397c5b93eb030f87cdddf0c5d4be599fc9/uv-0.4.29.tar.gz", hash = "sha256:9c559b6fdc042add463e86afa1c210716f7020bfc2e96b00df5af7afcb587ce7", size = 2102901 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/84/18/9c9056d373620b1cf5182ce9b2d258e86d117d667cf8883e12870f2a5edf/uv-0.4.25-py3-none-linux_armv6l.whl", hash = "sha256:94fb2b454afa6bdfeeea4b4581c878944ca9cf3a13712e6762f245f5fbaaf952", size = 13028246 },
- { url = "https://files.pythonhosted.org/packages/a1/19/8a3f09aba30ac5433dfecde55d5241a07c96bb12340c3b810bc58188a12e/uv-0.4.25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a7c3a18c20ddb527d296d1222bddf42b78031c50b5b4609d426569b5fb61f5b0", size = 13175265 },
- { url = "https://files.pythonhosted.org/packages/e8/c9/2f924bb29bd53c51b839c1c6126bd2cf4c451d4a7d8f34be078f9e31c57e/uv-0.4.25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:18100f0f36419a154306ed6211e3490bf18384cdf3f1a0950848bf64b62fa251", size = 12255610 },
- { url = "https://files.pythonhosted.org/packages/b2/5a/d8f8971aeb3389679505cf633a786cd72a96ce232f80f14cfe5a693b4c64/uv-0.4.25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:6e981b1465e30102e41946adede9cb08051a5d70c6daf09f91a7ea84f0b75c08", size = 12506511 },
- { url = "https://files.pythonhosted.org/packages/e3/96/8c73520daeba5022cec8749e44afd4ca9ef774bf728af9c258bddec3577f/uv-0.4.25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:578ae385fad6bd6f3868828e33d54994c716b315b1bc49106ec1f54c640837e4", size = 12836250 },
- { url = "https://files.pythonhosted.org/packages/67/3d/b0e810d365fb154fe1d380a0f43ee35a683cf9162f2501396d711bec2621/uv-0.4.25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d29a78f011ecc2f31c13605acb6574c2894c06d258b0f8d0dbb899986800450", size = 13521303 },
- { url = "https://files.pythonhosted.org/packages/2d/f4/dd3830ec7fc6e7e5237c184f30f2dbfed4f93605e472147eca1373bcc72b/uv-0.4.25-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ec181be2bda10651a3558156409ac481549983e0276d0e3645e3b1464e7f8715", size = 14105308 },
- { url = "https://files.pythonhosted.org/packages/f4/4e/0fca02f8681e4870beda172552e747e0424f6e9186546b00a5e92525fea9/uv-0.4.25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50c7d0d9e7f392f81b13bf3b7e37768d1486f2fc9d533a54982aa0ed11e4db23", size = 13859475 },
- { url = "https://files.pythonhosted.org/packages/33/07/1100e9bc652f2850930f466869515d16ffe9582aaaaa99bac332ebdfe3ea/uv-0.4.25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fc35b5273f1e018aecd66b70e0fd7d2eb6698853dde3e2fc644e7ebf9f825b1", size = 18100840 },
- { url = "https://files.pythonhosted.org/packages/fa/98/ba1cb7dd2aa639a064a9e49721e08f12a3424456d60dde1327e7c6437930/uv-0.4.25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7022a71ff63a3838796f40e954b76bf7820fc27e96fe002c537e75ff8e34f1d", size = 13645464 },
- { url = "https://files.pythonhosted.org/packages/0d/05/b97fb8c828a070e8291826922b2712d1146b11563b4860bc9ba80f5635d1/uv-0.4.25-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:e02afb0f6d4b58718347f7d7cfa5a801e985ce42181ba971ed85ef149f6658ca", size = 12694995 },
- { url = "https://files.pythonhosted.org/packages/b3/97/63df050811379130202898f60e735a1a331ba3a93b8aa1e9bb466f533913/uv-0.4.25-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:3d7680795ea78cdbabbcce73d039b2651cf1fa635ddc1aa3082660f6d6255c50", size = 12831737 },
- { url = "https://files.pythonhosted.org/packages/dc/e0/08352dcffa6e8435328861ea60b2c05e8bd030f1e93998443ba66209db7b/uv-0.4.25-py3-none-musllinux_1_1_i686.whl", hash = "sha256:aae9dcafd20d5ba978c8a4939ab942e8e2e155c109e9945207fbbd81d2892c9e", size = 13273529 },
- { url = "https://files.pythonhosted.org/packages/25/f4/eaf95e5eee4e2e69884df0953d094deae07216f72068ef1df08c0f49841d/uv-0.4.25-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:4c55040e67470f2b73e95e432aba06f103a0b348ea0b9c6689b1029c8d9e89fd", size = 15039860 },
- { url = "https://files.pythonhosted.org/packages/69/04/482b1cc9e8d599c7d766c4ba2d7a512ed3989921443792f92f26b8d44fe6/uv-0.4.25-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:bdbfd0c476b9e80a3f89af96aed6dd7d2782646311317a9c72614ccce99bb2ad", size = 13776302 },
- { url = "https://files.pythonhosted.org/packages/cd/7e/3d1cb735cc3df6341ac884b73eeec1f51a29192721be40be8e9b1d82666d/uv-0.4.25-py3-none-win32.whl", hash = "sha256:7d266e02fefef930609328c31c075084295c3cb472bab3f69549fad4fd9d82b3", size = 12970553 },
- { url = "https://files.pythonhosted.org/packages/04/e9/c00d2bb4a286b13fad0f06488ea9cbe9e76d0efcd81e7a907f72195d5b83/uv-0.4.25-py3-none-win_amd64.whl", hash = "sha256:be2a4fc4fcade9ea5e67e51738c95644360d6e59b6394b74fc579fb617f902f7", size = 14702875 },
+ { url = "https://files.pythonhosted.org/packages/1c/8d/78b6927a3e511a4bc05347714c8917896477537bf09a6301e84de08b7a59/uv-0.4.29-py3-none-linux_armv6l.whl", hash = "sha256:287dc3fd3f78093a5a82136f01cbd9f224e0905b38d3dcffdc96c08fbbe48ee9", size = 13250618 },
+ { url = "https://files.pythonhosted.org/packages/d8/2f/1bbfc3c15933fcf07c222e063044696320f5a9fe3d5c584960ed0c490cf8/uv-0.4.29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6b03859068aaa08ca9907a51d403d54b0a9d8054091646845a9192f213f099d4", size = 13316211 },
+ { url = "https://files.pythonhosted.org/packages/fb/1a/1c862cc36f29cf58b22758f31eb5f9611ee86429d470c8e4c0fd235592ec/uv-0.4.29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:950bbfe1954e9c3a5d6c4777bb778b4c23d0dea9ad9f77622c45d4fbba433355", size = 12363705 },
+ { url = "https://files.pythonhosted.org/packages/a1/0e/76e947db1135fa2436b11cc1ca927de187601be7ec65b0102f42a6a58211/uv-0.4.29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:3473b05142ba436ac30d036b7ab5e9bcfa97f63df5d1382f92e0a3e4aaa391bc", size = 12622825 },
+ { url = "https://files.pythonhosted.org/packages/41/3d/b54226b11eb935e4e57585905cf3ded2ac7d972c551bef1c3a000d4c5e47/uv-0.4.29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7060dfbad0bc26e9cecbb4f8482445c958071511f23728948478f81acfb29048", size = 13054445 },
+ { url = "https://files.pythonhosted.org/packages/bf/00/02fa712a3991957d2a65d043173d06d3a429acb3c4e54976f4385c034d97/uv-0.4.29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df35d9cbe4cfbb7bce287f56e3bb7a7cef0b7b5173ed889d936d4c470f2b1b83", size = 13655646 },
+ { url = "https://files.pythonhosted.org/packages/61/85/f6796032396bbd350648747c984376c8c8add14c75476ed8d5a3438a9c76/uv-0.4.29-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:cfb797a87b55d96cc0593e9f29ab5d58454be74598ea0158e1b2f4f2dc97cede", size = 14281147 },
+ { url = "https://files.pythonhosted.org/packages/17/48/3314a03c6580d0b05bd1b9122ff9a9fbde5163815cf84f5a220fc013cea1/uv-0.4.29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:668d3e6095c6f0cac6a831ef4030f7ad79442d1c84b9569f01f50b60c2d51a77", size = 14004714 },
+ { url = "https://files.pythonhosted.org/packages/11/e0/456bc5271f09ff385c57570628705757a59f9a3f8205ff029dc9b2213dbd/uv-0.4.29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0be21afa0e582ddc5badff6ef40c3c6784efc5feae4ad568307b668d40dc49bd", size = 18032241 },
+ { url = "https://files.pythonhosted.org/packages/ef/6c/db10ff7f178ee93a832941e1cddbf38bfb1b0e30fd07580db10eb909f19d/uv-0.4.29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6224a322267570e0470c61008fd1c8e2f50bf073b339f4c3010da86aef3c44c", size = 13787528 },
+ { url = "https://files.pythonhosted.org/packages/1b/cf/501cd6aeeae0413e83ed0c112a362e44c05fa01144ecfd05c6fb3533778d/uv-0.4.29-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:24cccff9c248864ba0ab3429bae56314146c9494ce66a881d70ea8cf2805945f", size = 12789635 },
+ { url = "https://files.pythonhosted.org/packages/8d/8d/3103af713c6369b6c1afe2bd8415eb43ea2cd4d11aa823f2e5747736b410/uv-0.4.29-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:68d4967b5f0af8bd46085e0f3ded229026700668a97734a21c3d11a5fc350c47", size = 13022589 },
+ { url = "https://files.pythonhosted.org/packages/4f/4d/e9a0da7c43301f27503ed0af881afb9059e3700bd374d1c7c6579ff9fb29/uv-0.4.29-py3-none-musllinux_1_1_i686.whl", hash = "sha256:75927da78f74bb935314d236dc61ecdc192e878e06eb79585b6d9d5ee9829f98", size = 13367805 },
+ { url = "https://files.pythonhosted.org/packages/be/70/a78cd7cdac7581cf0a7e027cf3c69d07ca5b6b83d39f571411cc73f1590f/uv-0.4.29-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:246da468ac0d51e7fb257cd038db2f8d6376ae269a44d01f56776e32108aa9da", size = 15158094 },
+ { url = "https://files.pythonhosted.org/packages/e6/93/3bcb18a54a9823c8bfadd362022b1c480da10c0bcd86398101f9a124e0a7/uv-0.4.29-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:8c71663c7df4f512c697de39a4926dc191897f5fede73644bb2329f532c1ebfa", size = 13917229 },
+ { url = "https://files.pythonhosted.org/packages/8a/38/bd90e265f868ddbc2dd3cc9625e2d8670d3ac35984a078491be11be754f3/uv-0.4.29-py3-none-win32.whl", hash = "sha256:b5775db128b98251c3ea7874367fc20dce9f9aac3dbfa635e3ef4a1c56842d9c", size = 13203439 },
+ { url = "https://files.pythonhosted.org/packages/cb/4f/446a0fe5901b110093f3888e93c8ebee1b08f35ba1699bbaf3645b553865/uv-0.4.29-py3-none-win_amd64.whl", hash = "sha256:67dcfd253020e25ed1c49e5bd06406205c37264f99e14002de53a357cd1cdadf", size = 14902665 },
]
[[package]]
@@ -2912,7 +3555,7 @@ wheels = [
[[package]]
name = "yt-dlp"
-version = "2024.10.7"
+version = "2024.10.22"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "brotli", marker = "implementation_name == 'cpython'" },
@@ -2924,9 +3567,9 @@ dependencies = [
{ name = "urllib3" },
{ name = "websockets" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/2e/b1/08679efb4c1932dc6420deda8a89f03d7440d6462b7f61d339db2732a497/yt_dlp-2024.10.7.tar.gz", hash = "sha256:0baf1ab517c9748d7e337ced91c5543c36fc16246a9ebedac32ebf20c1998ceb", size = 2877443 }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/79/acfe1c2bf64ed83e1b465e6550c0f5bc2214ea447a900b102f5ca6e4186e/yt_dlp-2024.10.22.tar.gz", hash = "sha256:47b82a1fd22411b5c95ef2f0a1ae1af4e6dfd736ea99fdb2a0ea41445abc62ba", size = 2885622 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/6e/91/ecb07d66110334cdb01e94b187577af3b041897090203c9957728825d46f/yt_dlp-2024.10.7-py3-none-any.whl", hash = "sha256:9e336ae663bfd7ad3ea1c02e722747388172719efc0fc39a807dace3073aa704", size = 3149082 },
+ { url = "https://files.pythonhosted.org/packages/bb/68/548f9819b41d53561d4f3d39588111cf39993c066b6e5300b4ae118eb2e6/yt_dlp-2024.10.22-py3-none-any.whl", hash = "sha256:ba166602ebe22a220e4dc1ead45bf00eb469ed812b22f4fb8bb54734f9b02084", size = 3155189 },
]
[[package]]
@@ -2940,35 +3583,35 @@ wheels = [
[[package]]
name = "zope-interface"
-version = "7.1.0"
+version = "7.1.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "setuptools" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/e4/1f/8bb0739aba9a8909bcfa2e12dc20443ebd5bd773b6796603f1a126211e18/zope_interface-7.1.0.tar.gz", hash = "sha256:3f005869a1a05e368965adb2075f97f8ee9a26c61898a9e52a9764d93774f237", size = 300239 }
+sdist = { url = "https://files.pythonhosted.org/packages/3c/f5/1079cab32302359cc09bd1dca9656e680601e0e8af9397322ab0fe85f368/zope.interface-7.1.1.tar.gz", hash = "sha256:4284d664ef0ff7b709836d4de7b13d80873dc5faeffc073abdb280058bfac5e3", size = 253129 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/52/cf/6fe78d1748ade8bde9e0afa0b7a6dc53427fa817c44c0c67937f4a3890ca/zope.interface-7.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2bd9e9f366a5df08ebbdc159f8224904c1c5ce63893984abb76954e6fbe4381a", size = 207992 },
- { url = "https://files.pythonhosted.org/packages/98/6a/7583a3bf0ba508d7454b69928ced99f516af674be7a2781d681bbdf3e439/zope.interface-7.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661d5df403cd3c5b8699ac480fa7f58047a3253b029db690efa0c3cf209993ef", size = 208498 },
- { url = "https://files.pythonhosted.org/packages/f2/d7/acae0a46ff4494ade2478335aeb2dec2ec024b7761915b82887cb04f207d/zope.interface-7.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91b6c30689cfd87c8f264acb2fc16ad6b3c72caba2aec1bf189314cf1a84ca33", size = 254730 },
- { url = "https://files.pythonhosted.org/packages/76/78/42201e0e6150a14d6aaf138f969186a89ec31d25a5860b7c054191cfefa6/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b6a4924f5bad9fe21d99f66a07da60d75696a136162427951ec3cb223a5570d", size = 249135 },
- { url = "https://files.pythonhosted.org/packages/3f/1e/a2bb69085db973bc936493e1a870c708b4e61496c4c1f04033a9aeb2dcce/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a3c00b35f6170be5454b45abe2719ea65919a2f09e8a6e7b1362312a872cd3", size = 254254 },
- { url = "https://files.pythonhosted.org/packages/4f/cf/a5cb40b19f52c100d0ce22797f63ac865ced81fbf3a75a7ae0ecf2c45810/zope.interface-7.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b936d61dbe29572fd2cfe13e30b925e5383bed1aba867692670f5a2a2eb7b4e9", size = 211705 },
- { url = "https://files.pythonhosted.org/packages/9a/0b/c9dd45c073109fcaa63d5e167cae9e364fcb25f3626350127258a678ff0f/zope.interface-7.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ac20581fc6cd7c754f6dff0ae06fedb060fa0e9ea6309d8be8b2701d9ea51c4", size = 208524 },
- { url = "https://files.pythonhosted.org/packages/e0/34/57afb328bcced4d0472c11cfab5581cc1e6bb91adf1bb87509a4f5690755/zope.interface-7.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:848b6fa92d7c8143646e64124ed46818a0049a24ecc517958c520081fd147685", size = 209032 },
- { url = "https://files.pythonhosted.org/packages/e9/a4/b2e4900f6d4a572979b5e8aa95f1ff9296b458978537f51ba546da51c108/zope.interface-7.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec1ef1fdb6f014d5886b97e52b16d0f852364f447d2ab0f0c6027765777b6667", size = 261251 },
- { url = "https://files.pythonhosted.org/packages/c3/89/2cd0a6b24819c024b340fa67f0dda65d0ac8bbd81f35a1fa7c468b681d55/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bcff5c09d0215f42ba64b49205a278e44413d9bf9fa688fd9e42bfe472b5f4f", size = 255366 },
- { url = "https://files.pythonhosted.org/packages/9e/00/e58be3067025ffbeed48094a07c1972d8150f6d628151fde66f16fa0d4ae/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07add15de0cc7e69917f7d286b64d54125c950aeb43efed7a5ea7172f000fbc1", size = 260078 },
- { url = "https://files.pythonhosted.org/packages/d1/b6/56436f9f6b74c13c9cd3dbd8345f47823d72b7c9ba2b39872cb7bee4cf42/zope.interface-7.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:9940d5bc441f887c5f375ec62bcf7e7e495a2d5b1da97de1184a88fb567f06af", size = 212092 },
- { url = "https://files.pythonhosted.org/packages/ee/d7/0ab8291230cf4fa05fa6f7bb26e0206d799a922070bc3a102f88133edc1e/zope.interface-7.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f245d039f72e6f802902375755846f5de1ee1e14c3e8736c078565599bcab621", size = 208649 },
- { url = "https://files.pythonhosted.org/packages/4e/ce/598d623faeca8a7ccb120a7d94f707efb61d21a57324a905c9a2bdb7b4b9/zope.interface-7.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6159e767d224d8f18deff634a1d3722e68d27488c357f62ebeb5f3e2f5288b1f", size = 209053 },
- { url = "https://files.pythonhosted.org/packages/ea/d0/c88caffdf6cf99e9b5d1fad9bdfa94d9eee21f72c2f9f4768bced100aab7/zope.interface-7.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e956b1fd7f3448dd5e00f273072e73e50dfafcb35e4227e6d5af208075593c9", size = 266506 },
- { url = "https://files.pythonhosted.org/packages/1d/bd/2b665bb66b18169828f0e3d0865eabdb3c8f59556db90367950edccfc072/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff115ef91c0eeac69cd92daeba36a9d8e14daee445b504eeea2b1c0b55821984", size = 261229 },
- { url = "https://files.pythonhosted.org/packages/04/a0/9a0595057002784395990b5e5a5e84e71905f5c110ea5ecae469dc831468/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec001798ab62c3fc5447162bf48496ae9fba02edc295a9e10a0b0c639a6452e", size = 267167 },
- { url = "https://files.pythonhosted.org/packages/fb/64/cf1a22aad65dc9746fdc6705042c066011e3fe80f9c73aea9a53b0b3642d/zope.interface-7.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:124149e2d42067b9c6597f4dafdc7a0983d0163868f897b7bb5dc850b14f9a87", size = 212207 },
- { url = "https://files.pythonhosted.org/packages/43/39/75d4e59474ec7aeb8eebb01fae88e97ee8b0b3144d7a445679f000001977/zope.interface-7.1.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9733a9a0f94ef53d7aa64661811b20875b5bc6039034c6e42fb9732170130573", size = 208650 },
- { url = "https://files.pythonhosted.org/packages/c9/24/929b5530508a39a842fe50e159681b3dd36800604252940662268c3a8551/zope.interface-7.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5fcf379b875c610b5a41bc8a891841533f98de0520287d7f85e25386cd10d3e9", size = 209057 },
- { url = "https://files.pythonhosted.org/packages/fa/a3/07c120b40d47a3b28faadbacea579db8d7dc9214c909da13d72fd55395f7/zope.interface-7.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0a45b5af9f72c805ee668d1479480ca85169312211bed6ed18c343e39307d5f", size = 266466 },
- { url = "https://files.pythonhosted.org/packages/4f/fa/e1925c8737787887a2801a45aadbc1ca8367fd9f135e721a2ce5a020e14d/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4af4a12b459a273b0b34679a5c3dc5e34c1847c3dd14a628aa0668e19e638ea2", size = 261220 },
- { url = "https://files.pythonhosted.org/packages/d5/79/d7828b915edf77f8f7849e0ab4380084d07c3d09ef86f9763f1490661d66/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a735f82d2e3ed47ca01a20dfc4c779b966b16352650a8036ab3955aad151ed8a", size = 267157 },
- { url = "https://files.pythonhosted.org/packages/98/ac/012f18dc9b35e8547975f6e0512bcb6a1e97901d7a5e4e4cb5899dee6304/zope.interface-7.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:5501e772aff595e3c54266bc1bfc5858e8f38974ce413a8f1044aae0f32a83a3", size = 212213 },
+ { url = "https://files.pythonhosted.org/packages/33/41/328372febe88b50cb1c77d99fd3ee8e628fb125bd26b38b5351f8b9bdcbb/zope.interface-7.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6650bd56ef350d37c8baccfd3ee8a0483ed6f8666e641e4b9ae1a1827b79f9e5", size = 208001 },
+ { url = "https://files.pythonhosted.org/packages/22/06/ced7336eeabba528a39803ccdf52200daa4e7b73d74feac52677f7c83a72/zope.interface-7.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84e87eba6b77a3af187bae82d8de1a7c208c2a04ec9f6bd444fd091b811ad92e", size = 208518 },
+ { url = "https://files.pythonhosted.org/packages/9a/c9/3a63c758a68739080d8c343dda2fca4d214096ed97ce56b875086b309dd2/zope.interface-7.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c4e1b4c06d9abd1037c088dae1566c85f344a3e6ae4350744c3f7f7259d9c67", size = 254689 },
+ { url = "https://files.pythonhosted.org/packages/9a/59/d8c59cfb16b3f086c868d0c531892c3914acbbb324005f0e5c640855a596/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cd5e3d910ac87652a09f6e5db8e41bc3b49cf08ddd2d73d30afc644801492cd", size = 249133 },
+ { url = "https://files.pythonhosted.org/packages/9a/6e/449acdd6530cbb9c224be3e59b032d8fc6db35ea8b398aaabcaee50f3881/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca95594d936ee349620900be5b46c0122a1ff6ce42d7d5cb2cf09dc84071ef16", size = 254250 },
+ { url = "https://files.pythonhosted.org/packages/76/cb/8a13047ae686ca0a478cbf9043132acdcc8ccf71cfa0af287de235fd54f4/zope.interface-7.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:ad339509dcfbbc99bf8e147db6686249c4032f26586699ec4c82f6e5909c9fe2", size = 211708 },
+ { url = "https://files.pythonhosted.org/packages/cc/9e/a53e0b252dca6f4858765efd4287239542e3018efe403ccf4f4947b1f6a8/zope.interface-7.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e59f175e868f856a77c0a77ba001385c377df2104fdbda6b9f99456a01e102a", size = 208535 },
+ { url = "https://files.pythonhosted.org/packages/4a/2c/19bb3ead6133fe457e833af67cc8ce497f54bfd90f5ac532af6e4892acb2/zope.interface-7.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0de23bcb93401994ea00bc5c677ef06d420340ac0a4e9c10d80e047b9ce5af3f", size = 209053 },
+ { url = "https://files.pythonhosted.org/packages/18/3f/3b341ed342f594f3b9e3fc48acecd929d118ee1ea6e415cedfebc2b78214/zope.interface-7.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdb7e7e5524b76d3ec037c1d81a9e2c7457b240fd4cb0a2476b65c3a5a6c81f", size = 260764 },
+ { url = "https://files.pythonhosted.org/packages/65/2a/bb8f72d938cf4edf7e40cbdf14477242a3753205c4f537dafdfbb33249e5/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3603ef82a9920bd0bfb505423cb7e937498ad971ad5a6141841e8f76d2fd5446", size = 254805 },
+ { url = "https://files.pythonhosted.org/packages/b1/60/abc01b59a41762cf785be8e997a7301e3cb93d19e066a35f10fb31ac0277/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d52d052355e0c5c89e0630dd2ff7c0b823fd5f56286a663e92444761b35e25", size = 259573 },
+ { url = "https://files.pythonhosted.org/packages/19/50/52a20a6a9e7c605eabb87dcdd5823369d3096854c41b968f2d1e18a8ae8f/zope.interface-7.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:179ad46ece518c9084cb272e4a69d266b659f7f8f48e51706746c2d8a426433e", size = 212067 },
+ { url = "https://files.pythonhosted.org/packages/0f/fe/52bd130dd3f8b88868e741cf9bfeea4367e13d3f84933746f4ba01c85e6b/zope.interface-7.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e6503534b52bb1720ace9366ee30838a58a3413d3e197512f3338c8f34b5d89d", size = 208716 },
+ { url = "https://files.pythonhosted.org/packages/8b/a9/51fe239b07f69384e77568ca3098c518926204eb1fdc7cdcc154c0c78521/zope.interface-7.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f85b290e5b8b11814efb0d004d8ce6c9a483c35c462e8d9bf84abb93e79fa770", size = 209115 },
+ { url = "https://files.pythonhosted.org/packages/f0/fe/33f1f1e68d54c9563db436596a648e57c9dfc298dc0525d348cdb5e812d0/zope.interface-7.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d029fac6a80edae80f79c37e5e3abfa92968fe921886139b3ee470a1b177321a", size = 264001 },
+ { url = "https://files.pythonhosted.org/packages/2e/7f/4d6dafc4debe955a72dd33f8cae1d2e522d43b42167ee8735fd0fe36961e/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5836b8fb044c6e75ba34dfaabc602493019eadfa0faf6ff25f4c4c356a71a853", size = 259018 },
+ { url = "https://files.pythonhosted.org/packages/7d/3f/3180bbd9937a2889a67ad2515e56869e0cdb1f47a1f0da52dc1065c81ff8/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7395f13533318f150ee72adb55b29284b16e73b6d5f02ab21f173b3e83f242b8", size = 264470 },
+ { url = "https://files.pythonhosted.org/packages/95/b8/46a52bfec80089d7e687c1e4471c5918e3a60c2dfff63d3e5588e4bd6656/zope.interface-7.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:1d0e23c6b746eb8ce04573cc47bcac60961ac138885d207bd6f57e27a1431ae8", size = 212226 },
+ { url = "https://files.pythonhosted.org/packages/7e/78/60fb41f6fca56f90a107244e28768deac8697de8cc0f7c8469725c9949ad/zope.interface-7.1.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9fad9bd5502221ab179f13ea251cb30eef7cf65023156967f86673aff54b53a0", size = 208720 },
+ { url = "https://files.pythonhosted.org/packages/a5/4b/9152d924be141a1b52700ec0bb5c9a28795f67f4253dadb7f4c0c6d63675/zope.interface-7.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:55c373becbd36a44d0c9be1d5271422fdaa8562d158fb44b4192297b3c67096c", size = 209114 },
+ { url = "https://files.pythonhosted.org/packages/00/cc/23d6d94db158b31b82e92202d3e8938d5e5cb38e3141af823a34bd8ae511/zope.interface-7.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed1df8cc01dd1e3970666a7370b8bfc7457371c58ba88c57bd5bca17ab198053", size = 263960 },
+ { url = "https://files.pythonhosted.org/packages/e7/d6/acd466c950688ed8964ade5f9c5f2c035a52b44f18f19a6d79d3de48a255/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c14f0727c978639139e6cad7a60e82b7720922678d75aacb90cf4ef74a068c", size = 259004 },
+ { url = "https://files.pythonhosted.org/packages/71/31/44b746ed39134fa9c28262dc8ff9821c6b6f4df5a9edc1e599219d16cb79/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b1eed7670d564f1025d7cda89f99f216c30210e42e95de466135be0b4a499d9", size = 264463 },
+ { url = "https://files.pythonhosted.org/packages/5a/e1/30fb5f7e587e14a57c8f41413cb76eecbcfd878ef105eb908d2d2e648b73/zope.interface-7.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:3defc925c4b22ac1272d544a49c6ba04c3eefcce3200319ee1be03d9270306dd", size = 212236 },
]