diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 79cc28e7..b1153211 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -102,7 +102,7 @@ jobs: # TODO: remove this exception for windows once we get tests passing on that platform if: ${{ !contains(matrix.os, 'windows') }} run: | - python -m pytest -s --basetemp=tests/out --ignore=archivebox/vendor --ignore=deb_dist --ignore=pip_dist --ignore=brew_dist + python -m pytest -s --basetemp=tests/out --ignore=archivebox/pkgs docker_tests: runs-on: ubuntu-latest diff --git a/.gitmodules b/.gitmodules index db744b8a..e260fdf5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,9 +1,3 @@ [submodule "docs"] path = docs url = https://github.com/ArchiveBox/ArchiveBox.wiki.git -[submodule "archivebox/vendor/pocket"] - path = archivebox/vendor/pocket - url = https://github.com/tapanpandita/pocket -[submodule "archivebox/vendor/pydantic-pkgr"] - path = archivebox/vendor/pydantic-pkgr - url = https://github.com/ArchiveBox/pydantic-pkgr diff --git a/archivebox/.flake8 b/archivebox/.flake8 index 01af646d..bb7176bd 100644 --- a/archivebox/.flake8 +++ b/archivebox/.flake8 @@ -3,4 +3,4 @@ ignore = D100,D101,D102,D103,D104,D105,D202,D203,D205,D400,E131,E241,E252,E266,E select = F,E9,W max-line-length = 130 max-complexity = 10 -exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv +exclude = migrations,tests,node_modules,vendor,venv,.venv,.venv2,.docker-venv,data,data* diff --git a/archivebox/__init__.py b/archivebox/__init__.py index bb2a9806..fd32778c 100755 --- a/archivebox/__init__.py +++ b/archivebox/__init__.py @@ -13,8 +13,8 @@ __package__ = 'archivebox' import os import sys - from pathlib import Path +from typing import cast ASCII_LOGO = """ █████╗ ██████╗ ██████╗██╗ ██╗██╗██╗ ██╗███████╗ ██████╗ ██████╗ ██╗ ██╗ @@ -47,11 +47,54 @@ from .monkey_patches import * # noqa # print('LOADING VENDORED LIBRARIES') -from .vendor import load_vendored_libs # noqa -load_vendored_libs() +from .pkgs import load_vendored_pkgs # noqa +load_vendored_pkgs() # print('DONE LOADING VENDORED LIBRARIES') +# Load ABX Plugin Specifications + Default Implementations +import abx # noqa +import abx_spec_archivebox # noqa +import abx_spec_config # noqa +import abx_spec_pydantic_pkgr # noqa +import abx_spec_django # noqa +import abx_spec_searchbackend # noqa +abx.pm.add_hookspecs(abx_spec_config.PLUGIN_SPEC) +abx.pm.register(abx_spec_config.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_pydantic_pkgr.PLUGIN_SPEC) +abx.pm.register(abx_spec_pydantic_pkgr.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_django.PLUGIN_SPEC) +abx.pm.register(abx_spec_django.PLUGIN_SPEC()) + +abx.pm.add_hookspecs(abx_spec_searchbackend.PLUGIN_SPEC) +abx.pm.register(abx_spec_searchbackend.PLUGIN_SPEC()) + +# Cast to ArchiveBoxPluginSpec to enable static type checking of pm.hook.call() methods +abx.pm = cast(abx.ABXPluginManager[abx_spec_archivebox.ArchiveBoxPluginSpec], abx.pm) +pm = abx.pm + + +# Load all pip-installed ABX-compatible plugins +ABX_ECOSYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx') + +# Load all built-in ArchiveBox plugins +ARCHIVEBOX_BUILTIN_PLUGINS = { + 'config': PACKAGE_DIR / 'config', + 'core': PACKAGE_DIR / 'core', + # 'search': PACKAGE_DIR / 'search', + # 'core': PACKAGE_DIR / 'core', +} + +# Load all user-defined ArchiveBox plugins +USER_PLUGINS = abx.find_plugins_in_dir(Path(os.getcwd()) / 'user_plugins') + +# Import all plugins and register them with ABX Plugin Manager +ALL_PLUGINS = {**ABX_ECOSYSTEM_PLUGINS, **ARCHIVEBOX_BUILTIN_PLUGINS, **USER_PLUGINS} +LOADED_PLUGINS = abx.load_plugins(ALL_PLUGINS) + +# Setup basic config, constants, paths, and version from .config.constants import CONSTANTS # noqa from .config.paths import PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .config.version import VERSION # noqa diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index b1b43c7d..5a9ab109 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -175,7 +175,7 @@ class ABIDModel(models.Model): 'uri': self.abid_uri_src, 'subtype': self.abid_subtype_src, 'rand': self.abid_rand_src, - 'salt': 'self.abid_salt', # defined as static class vars at build time + 'salt': 'self.abid_salt', # defined as static class vars at build time } @property diff --git a/archivebox/abx/__init__.py b/archivebox/abx/__init__.py deleted file mode 100644 index c571a2e3..00000000 --- a/archivebox/abx/__init__.py +++ /dev/null @@ -1,131 +0,0 @@ -__package__ = 'abx' - -import importlib -from pathlib import Path -from typing import Dict, Callable, List - -from . import hookspec as base_spec -from abx.hookspec import hookimpl, hookspec # noqa -from abx.manager import pm, PluginManager # noqa - - -pm.add_hookspecs(base_spec) - - -###### PLUGIN DISCOVERY AND LOADING ######################################################## - -def get_plugin_order(plugin_entrypoint: Path): - order = 999 - try: - # if .plugin_order file exists, use it to set the load priority - order = int((plugin_entrypoint.parent / '.plugin_order').read_text()) - except FileNotFoundError: - pass - return (order, plugin_entrypoint) - -def register_hookspecs(hookspecs: List[str]): - """ - Register all the hookspecs from a list of module names. - """ - for hookspec_import_path in hookspecs: - hookspec_module = importlib.import_module(hookspec_import_path) - pm.add_hookspecs(hookspec_module) - - -def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]: - """ - Find all the plugins in a given directory. Just looks for an __init__.py file. - """ - return { - f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent - for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order) - if plugin_entrypoint.parent.name != 'abx' - } # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip" - - -def get_pip_installed_plugins(group='abx'): - """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" - import importlib.metadata - - DETECTED_PLUGINS = {} # module_name: module_dir_path - for dist in list(importlib.metadata.distributions()): - for entrypoint in dist.entry_points: - if entrypoint.group != group or pm.is_blocked(entrypoint.name): - continue - DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent - # pm.register(plugin, name=ep.name) - # pm._plugin_distinfo.append((plugin, DistFacade(dist))) - return DETECTED_PLUGINS - - -def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]): - """ - Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories. - """ - DETECTED_PLUGINS = {} - for plugin_prefix, plugin_dir in plugin_dirs.items(): - DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix)) - return DETECTED_PLUGINS - - -# Load all plugins from pip packages, archivebox built-ins, and user plugins - -def load_plugins(plugins_dict: Dict[str, Path]): - """ - Load all the plugins from a dictionary of module names and directory paths. - """ - LOADED_PLUGINS = {} - for plugin_module, plugin_dir in plugins_dict.items(): - # print(f'Loading plugin: {plugin_module} from {plugin_dir}') - plugin_module_loaded = importlib.import_module(plugin_module) - pm.register(plugin_module_loaded) - LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN - # print(f' √ Loaded plugin: {plugin_module}') - return LOADED_PLUGINS - -def get_registered_plugins(): - """ - Get all the plugins registered with Pluggy. - """ - plugins = {} - plugin_to_distinfo = dict(pm.list_plugin_distinfo()) - for plugin in pm.get_plugins(): - plugin_info = { - "name": plugin.__name__, - "hooks": [h.name for h in pm.get_hookcallers(plugin) or ()], - } - distinfo = plugin_to_distinfo.get(plugin) - if distinfo: - plugin_info["version"] = distinfo.version - plugin_info["name"] = ( - getattr(distinfo, "name", None) or distinfo.project_name - ) - plugins[plugin_info["name"]] = plugin_info - return plugins - - - - -def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]: - """ - Get all the functions marked with @hookimpl on a module. - """ - if not plugin_pkg: - return {} - - hooks = {} - - plugin_module = importlib.import_module(plugin_pkg) - for attr_name in dir(plugin_module): - if attr_name.startswith('_'): - continue - try: - attr = getattr(plugin_module, attr_name) - if isinstance(attr, Callable): - hooks[attr_name] = None - pm.parse_hookimpl_opts(plugin_module, attr_name) - hooks[attr_name] = attr - except Exception as e: - print(f'Error getting hookimpls for {plugin_pkg}: {e}') - - return hooks diff --git a/archivebox/abx/archivebox/__init__.py b/archivebox/abx/archivebox/__init__.py deleted file mode 100644 index 58bbb447..00000000 --- a/archivebox/abx/archivebox/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -__package__ = 'abx.archivebox' - -import os -import importlib - -from typing import Dict -from pathlib import Path - - -def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]): - """Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py""" - LOADED_PLUGINS = {} - for plugin_module, plugin_dir in reversed(plugins_dict.items()): - # print(f'Loading plugin: {plugin_module} from {plugin_dir}') - - # 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py) - try: - plugin_module_loaded = importlib.import_module(plugin_module) - pm.register(plugin_module_loaded) - except Exception as e: - print(f'Error registering plugin: {plugin_module} - {e}') - - - # 2. then try to import plugin_module.apps as well - if os.access(plugin_dir / 'apps.py', os.R_OK): - plugin_apps = importlib.import_module(plugin_module + '.apps') - pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class) - - # print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}') - return LOADED_PLUGINS diff --git a/archivebox/abx/archivebox/base_binary.py b/archivebox/abx/archivebox/base_binary.py deleted file mode 100644 index 7890c05b..00000000 --- a/archivebox/abx/archivebox/base_binary.py +++ /dev/null @@ -1,106 +0,0 @@ -__package__ = "abx.archivebox" - -import os -from typing import Optional, cast -from typing_extensions import Self - -from pydantic import validate_call -from pydantic_pkgr import ( - Binary, - BinProvider, - BinProviderName, - AptProvider, - BrewProvider, - EnvProvider, -) - -from archivebox.config.permissions import ARCHIVEBOX_USER - - -class BaseBinProvider(BinProvider): - - # TODO: add install/load/load_or_install methods as abx.hookimpl methods - - @property - def admin_url(self) -> str: - # e.g. /admin/environment/binproviders/NpmBinProvider/ TODO - return "/admin/environment/binaries/" - -class BaseBinary(Binary): - - @staticmethod - def symlink_to_lib(binary, bin_dir=None) -> None: - from archivebox.config.common import STORAGE_CONFIG - bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin' - - if not (binary.abspath and os.access(binary.abspath, os.R_OK)): - return - - try: - bin_dir.mkdir(parents=True, exist_ok=True) - symlink = bin_dir / binary.name - symlink.unlink(missing_ok=True) - symlink.symlink_to(binary.abspath) - symlink.chmod(0o777) # make sure its executable by everyone - except Exception as err: - # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') - # not actually needed, we can just run without it - pass - - @validate_call - def load(self, fresh=False, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - if fresh: - binary = super().load(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - else: - # get cached binary from db - try: - from machine.models import InstalledBinary - installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore - binary = InstalledBinary.load_from_db(installed_binary) - except Exception: - # maybe we are not in a DATA dir so there is no db, fallback to reading from fs - # (e.g. when archivebox version is run outside of a DATA dir) - binary = super().load(**kwargs) - return cast(Self, binary) - - @validate_call - def install(self, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - binary = super().install(**kwargs) - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - return binary - - @validate_call - def load_or_install(self, fresh=False, **kwargs) -> Self: - from archivebox.config.common import STORAGE_CONFIG - try: - binary = self.load(fresh=fresh) - if binary and binary.version: - self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin') - return binary - except Exception: - pass - return self.install(**kwargs) - - @property - def admin_url(self) -> str: - # e.g. /admin/environment/config/LdapConfig/ - return f"/admin/environment/binaries/{self.name}/" - - -class AptBinProvider(AptProvider, BaseBinProvider): - name: BinProviderName = "apt" - -class BrewBinProvider(BrewProvider, BaseBinProvider): - name: BinProviderName = "brew" - -class EnvBinProvider(EnvProvider, BaseBinProvider): - name: BinProviderName = "env" - - euid: Optional[int] = ARCHIVEBOX_USER - -apt = AptBinProvider() -brew = BrewBinProvider() -env = EnvBinProvider() diff --git a/archivebox/abx/archivebox/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py deleted file mode 100644 index f78921e0..00000000 --- a/archivebox/abx/archivebox/base_extractor.py +++ /dev/null @@ -1,219 +0,0 @@ -__package__ = 'abx.archivebox' - -import json -import os - -from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple -from typing_extensions import Self -from pathlib import Path - -from pydantic import model_validator, AfterValidator -from pydantic_pkgr import BinName -from django.utils.functional import cached_property -from django.utils import timezone - -import abx - -from .base_binary import BaseBinary - - -def no_empty_args(args: List[str]) -> List[str]: - assert all(len(arg) for arg in args) - return args - -ExtractorName = Literal['wget', 'warc', 'media', 'singlefile'] | str - -HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))] -CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(no_empty_args)] - - -class BaseExtractor: - - name: ExtractorName - binary: BinName - - output_path_func: HandlerFuncStr = 'self.get_output_path' - should_extract_func: HandlerFuncStr = 'self.should_extract' - extract_func: HandlerFuncStr = 'self.extract' - exec_func: HandlerFuncStr = 'self.exec' - - default_args: CmdArgsList = [] - extra_args: CmdArgsList = [] - args: Optional[CmdArgsList] = None - - @model_validator(mode='after') - def validate_model(self) -> Self: - if self.args is None: - self.args = [*self.default_args, *self.extra_args] - return self - - - def get_output_path(self, snapshot) -> Path: - return Path(self.__class__.__name__.lower()) - - def should_extract(self, uri: str, config: dict | None=None) -> bool: - try: - assert self.detect_installed_binary().version - except Exception: - raise - # could not load binary - return False - - # output_dir = self.get_output_path(snapshot) - # if output_dir.glob('*.*'): - # return False - return True - - @abx.hookimpl - def extract(self, snapshot_id: str) -> Dict[str, Any]: - from core.models import Snapshot - from archivebox import CONSTANTS - - snapshot = Snapshot.objects.get(id=snapshot_id) - - if not self.should_extract(snapshot): - return {} - - status = 'failed' - start_ts = timezone.now() - uplink = self.detect_network_interface() - installed_binary = self.detect_installed_binary() - machine = installed_binary.machine - assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true - - output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid) - output_dir.mkdir(parents=True, exist_ok=True) - - # execute the extractor binary with the given args - args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args] - cmd = [str(installed_binary.abspath), *args] - proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir) - - # collect the output - end_ts = timezone.now() - output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')) - stdout = proc.stdout.strip() - stderr = proc.stderr.strip() - output_json = None - output_text = stdout - try: - output_json = json.loads(stdout.strip()) - output_text = None - except json.JSONDecodeError: - pass - - errors = [] - if proc.returncode == 0: - status = 'success' - else: - errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}') - - # increment health stats counters - if status == 'success': - machine.record_health_success() - uplink.record_health_success() - installed_binary.record_health_success() - else: - machine.record_health_failure() - uplink.record_health_failure() - installed_binary.record_health_failure() - - return { - 'extractor': self.name, - - 'snapshot': { - 'id': snapshot.id, - 'abid': snapshot.abid, - 'url': snapshot.url, - 'created_by_id': snapshot.created_by_id, - }, - - 'machine': { - 'id': machine.id, - 'abid': machine.abid, - 'guid': machine.guid, - 'hostname': machine.hostname, - 'hw_in_docker': machine.hw_in_docker, - 'hw_in_vm': machine.hw_in_vm, - 'hw_manufacturer': machine.hw_manufacturer, - 'hw_product': machine.hw_product, - 'hw_uuid': machine.hw_uuid, - 'os_arch': machine.os_arch, - 'os_family': machine.os_family, - 'os_platform': machine.os_platform, - 'os_release': machine.os_release, - 'os_kernel': machine.os_kernel, - }, - - 'uplink': { - 'id': uplink.id, - 'abid': uplink.abid, - 'mac_address': uplink.mac_address, - 'ip_public': uplink.ip_public, - 'ip_local': uplink.ip_local, - 'dns_server': uplink.dns_server, - 'hostname': uplink.hostname, - 'iface': uplink.iface, - 'isp': uplink.isp, - 'city': uplink.city, - 'region': uplink.region, - 'country': uplink.country, - }, - - 'binary': { - 'id': installed_binary.id, - 'abid': installed_binary.abid, - 'name': installed_binary.name, - 'binprovider': installed_binary.binprovider, - 'abspath': installed_binary.abspath, - 'version': installed_binary.version, - 'sha256': installed_binary.sha256, - }, - - 'cmd': cmd, - 'stdout': stdout, - 'stderr': stderr, - 'returncode': proc.returncode, - 'start_ts': start_ts, - 'end_ts': end_ts, - - 'status': status, - 'errors': errors, - 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)), - 'output_files': output_files, - 'output_json': output_json or {}, - 'output_text': output_text or '', - } - - # TODO: move this to a hookimpl - def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None): - cwd = cwd or Path(os.getcwd()) - binary = self.load_binary(installed_binary=installed_binary) - - return binary.exec(cmd=args, cwd=cwd) - - @cached_property - def BINARY(self) -> BaseBinary: - import abx.archivebox.reads - for binary in abx.archivebox.reads.get_BINARIES().values(): - if binary.name == self.binary: - return binary - raise ValueError(f'Binary {self.binary} not found') - - def detect_installed_binary(self): - from machine.models import InstalledBinary - # hydrates binary from DB/cache if record of installed version is recent enough - # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host - return InstalledBinary.objects.get_from_db_or_cache(self.BINARY) - - def load_binary(self, installed_binary=None) -> BaseBinary: - installed_binary = installed_binary or self.detect_installed_binary() - return installed_binary.load_from_db() - - def detect_network_interface(self): - from machine.models import NetworkInterface - return NetworkInterface.objects.current() - - @abx.hookimpl - def get_EXTRACTORS(self): - return [self] diff --git a/archivebox/abx/archivebox/base_replayer.py b/archivebox/abx/archivebox/base_replayer.py deleted file mode 100644 index 097a9e94..00000000 --- a/archivebox/abx/archivebox/base_replayer.py +++ /dev/null @@ -1,25 +0,0 @@ -__package__ = 'abx.archivebox' - -import abx - - -class BaseReplayer: - """Describes how to render an ArchiveResult in several contexts""" - - url_pattern: str = '*' - - row_template: str = 'plugins/generic_replayer/templates/row.html' - embed_template: str = 'plugins/generic_replayer/templates/embed.html' - fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html' - - # row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view' - # embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view' - # fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view' - # icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' - # thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon' - - @abx.hookimpl - def get_REPLAYERS(self): - return [self] - - # TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc... diff --git a/archivebox/abx/archivebox/base_searchbackend.py b/archivebox/abx/archivebox/base_searchbackend.py deleted file mode 100644 index 72713ab8..00000000 --- a/archivebox/abx/archivebox/base_searchbackend.py +++ /dev/null @@ -1,25 +0,0 @@ -__package__ = 'abx.archivebox' - -from typing import Iterable, List -import abc - - - -class BaseSearchBackend(abc.ABC): - name: str - - @staticmethod - @abc.abstractmethod - def index(snapshot_id: str, texts: List[str]): - return - - @staticmethod - @abc.abstractmethod - def flush(snapshot_ids: Iterable[str]): - return - - @staticmethod - @abc.abstractmethod - def search(text: str) -> List[str]: - raise NotImplementedError("search method must be implemented by subclass") - diff --git a/archivebox/abx/archivebox/hookspec.py b/archivebox/abx/archivebox/hookspec.py deleted file mode 100644 index bfcb93b8..00000000 --- a/archivebox/abx/archivebox/hookspec.py +++ /dev/null @@ -1,52 +0,0 @@ -__package__ = 'abx.archivebox' - -from typing import Dict, Any - -from .. import hookspec - -from .base_binary import BaseBinary, BaseBinProvider -from .base_configset import BaseConfigSet -from .base_extractor import BaseExtractor -from .base_searchbackend import BaseSearchBackend - - -@hookspec -def get_PLUGIN() -> Dict[str, Dict[str, Any]]: - return {} - -@hookspec -def get_CONFIG() -> Dict[str, BaseConfigSet]: - return {} - - - -@hookspec -def get_EXTRACTORS() -> Dict[str, BaseExtractor]: - return {} - -@hookspec -def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]: - return {} - -# @hookspec -# def get_REPLAYERS() -> Dict[str, BaseReplayer]: -# return {} - -# @hookspec -# def get_ADMINDATAVIEWS(): -# return {} - -# @hookspec -# def get_QUEUES(): -# return {} - - -############################################################## -# provided by abx.pydantic_pkgr.hookspec: -# @hookspec -# def get_BINARIES() -> Dict[str, BaseBinary]: -# return {} - -# @hookspec -# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]: -# return {} diff --git a/archivebox/abx/archivebox/reads.py b/archivebox/abx/archivebox/reads.py deleted file mode 100644 index d2877ac5..00000000 --- a/archivebox/abx/archivebox/reads.py +++ /dev/null @@ -1,160 +0,0 @@ -__package__ = 'abx.archivebox' - -import importlib -from typing import Dict, Set, Any, TYPE_CHECKING - -from benedict import benedict - -import abx -from .. import pm - -if TYPE_CHECKING: - from .base_configset import BaseConfigSet - from .base_binary import BaseBinary, BaseBinProvider - from .base_extractor import BaseExtractor - from .base_searchbackend import BaseSearchBackend - # from .base_replayer import BaseReplayer - # from .base_queue import BaseQueue - # from .base_admindataview import BaseAdminDataView - -# API exposed to ArchiveBox code - -def get_PLUGINS() -> Dict[str, Dict[str, Any]]: - return benedict({ - plugin_id: plugin - for plugin_dict in pm.hook.get_PLUGIN() - for plugin_id, plugin in plugin_dict.items() - }) - -def get_PLUGIN(plugin_id: str) -> Dict[str, Any]: - plugin_info = get_PLUGINS().get(plugin_id, {}) - package = plugin_info.get('package', plugin_info.get('PACKAGE', None)) - if not package: - return {'id': plugin_id, 'hooks': {}} - module = importlib.import_module(package) - hooks = abx.get_plugin_hooks(module.__package__) - assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks) - - return benedict({ - 'id': plugin_id, - 'label': getattr(module, '__label__', plugin_id), - 'module': module, - 'package': module.__package__, - 'hooks': hooks, - 'version': getattr(module, '__version__', '999.999.999'), - 'author': getattr(module, '__author__', 'Unknown'), - 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'), - 'dependencies': getattr(module, '__dependencies__', []), - 'source_code': module.__file__, - **plugin_info, - }) - - -def get_HOOKS() -> Set[str]: - return { - hook_name - for plugin_id in get_PLUGINS().keys() - for hook_name in get_PLUGIN(plugin_id).hooks - } - -def get_CONFIGS() -> Dict[str, 'BaseConfigSet']: - return benedict({ - config_id: configset - for plugin_configs in pm.hook.get_CONFIG() - for config_id, configset in plugin_configs.items() - }) - - -def get_FLAT_CONFIG() -> Dict[str, Any]: - return benedict({ - key: value - for configset in get_CONFIGS().values() - for key, value in configset.model_dump().items() - }) - -def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']: - # TODO: move these to plugins - from abx.archivebox.base_binary import apt, brew, env - builtin_binproviders = { - 'env': env, - 'apt': apt, - 'brew': brew, - } - - return benedict({ - binprovider_id: binprovider - for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()] - for binprovider_id, binprovider in plugin_binproviders.items() - }) - -def get_BINARIES() -> Dict[str, 'BaseBinary']: - return benedict({ - binary_id: binary - for plugin_binaries in pm.hook.get_BINARIES() - for binary_id, binary in plugin_binaries.items() - }) - -def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']: - return benedict({ - extractor_id: extractor - for plugin_extractors in pm.hook.get_EXTRACTORS() - for extractor_id, extractor in plugin_extractors.items() - }) - -# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']: -# return benedict({ -# replayer.id: replayer -# for plugin_replayers in pm.hook.get_REPLAYERS() -# for replayer in plugin_replayers -# }) - -# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']: -# return benedict({ -# admin_dataview.id: admin_dataview -# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS() -# for admin_dataview in plugin_admin_dataviews -# }) - -# def get_QUEUES() -> Dict[str, 'BaseQueue']: -# return benedict({ -# queue.id: queue -# for plugin_queues in pm.hook.get_QUEUES() -# for queue in plugin_queues -# }) - -def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']: - return benedict({ - searchbackend_id: searchbackend - for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() - for searchbackend_id,searchbackend in plugin_searchbackends.items() - }) - - - -def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None): - """Get all the relevant config for the given scope, in correct precedence order""" - - from django.conf import settings - default_config: benedict = defaults or settings.CONFIG - - snapshot = snapshot or (archiveresult and archiveresult.snapshot) - crawl = crawl or (snapshot and snapshot.crawl) - seed = seed or (crawl and crawl.seed) - persona = persona or (crawl and crawl.persona) - - persona_config = persona.config if persona else {} - seed_config = seed.config if seed else {} - crawl_config = crawl.config if crawl else {} - snapshot_config = snapshot.config if snapshot else {} - archiveresult_config = archiveresult.config if archiveresult else {} - extra_config = extra_config or {} - - return { - **default_config, # defaults / config file / environment variables - **persona_config, # lowest precedence - **seed_config, - **crawl_config, - **snapshot_config, - **archiveresult_config, - **extra_config, # highest precedence - } diff --git a/archivebox/abx/django/__init__.py b/archivebox/abx/django/__init__.py deleted file mode 100644 index 56fe8ddd..00000000 --- a/archivebox/abx/django/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'abx.django' diff --git a/archivebox/abx/django/apps.py b/archivebox/abx/django/apps.py deleted file mode 100644 index 085647c1..00000000 --- a/archivebox/abx/django/apps.py +++ /dev/null @@ -1,13 +0,0 @@ -__package__ = 'abx.django' - -from django.apps import AppConfig - - -class ABXConfig(AppConfig): - name = 'abx' - - def ready(self): - import abx - from django.conf import settings - - abx.pm.hook.ready(settings=settings) diff --git a/archivebox/abx/django/hookspec.py b/archivebox/abx/django/hookspec.py deleted file mode 100644 index 87f8e520..00000000 --- a/archivebox/abx/django/hookspec.py +++ /dev/null @@ -1,125 +0,0 @@ -__package__ = 'abx.django' - -from ..hookspec import hookspec - - -########################################################################################### - -@hookspec -def get_INSTALLED_APPS(): - """Return a list of apps to add to INSTALLED_APPS""" - # e.g. ['your_plugin_type.plugin_name'] - return [] - -# @hookspec -# def register_INSTALLED_APPS(INSTALLED_APPS): -# """Mutate INSTALLED_APPS in place to add your app in a specific position""" -# # idx_of_contrib = INSTALLED_APPS.index('django.contrib.auth') -# # INSTALLED_APPS.insert(idx_of_contrib + 1, 'your_plugin_type.plugin_name') -# pass - - -@hookspec -def get_TEMPLATE_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/templates'] - -# @hookspec -# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): -# """Install django settings""" -# # e.g. TEMPLATE_DIRS.insert(0, 'your_plugin_type/plugin_name/templates') -# pass - - -@hookspec -def get_STATICFILES_DIRS(): - return [] # e.g. ['your_plugin_type/plugin_name/static'] - -# @hookspec -# def register_STATICFILES_DIRS(STATICFILES_DIRS): -# """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" -# # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') -# pass - - -@hookspec -def get_MIDDLEWARE(): - return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] - -# @hookspec -# def register_MIDDLEWARE(MIDDLEWARE): -# """Mutate MIDDLEWARE in place to add your middleware in a specific position""" -# # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') -# pass - - -@hookspec -def get_AUTHENTICATION_BACKENDS(): - return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] - -# @hookspec -# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): -# """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" -# # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') -# pass - -@hookspec -def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME): - return [] # e.g. [{'name': 'your_plugin_type.plugin_name', 'HUEY': {...}}] - -# @hookspec -# def register_DJANGO_HUEY(DJANGO_HUEY): -# """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" -# # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' -# pass - - -@hookspec -def get_ADMIN_DATA_VIEWS_URLS(): - return [] - -# @hookspec -# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): -# """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" -# # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') -# pass - - -# @hookspec -# def register_settings(settings): -# """Mutate settings in place to add your settings / modify existing settings""" -# # settings.SOME_KEY = 'some_value' -# pass - - -########################################################################################### - -@hookspec -def get_urlpatterns(): - return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] - -# @hookspec -# def register_urlpatterns(urlpatterns): -# """Mutate urlpatterns in place to add your urlpatterns in a specific position""" -# # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) -# pass - -########################################################################################### - -@hookspec -def register_checks(): - """Register django checks with django system checks system""" - pass - -@hookspec -def register_admin(admin_site): - """Register django admin views/models with the main django admin site instance""" - pass - - -########################################################################################### - - -@hookspec -def ready(): - """Called when Django apps app.ready() are triggered""" - pass diff --git a/archivebox/abx/django/use.py b/archivebox/abx/django/use.py deleted file mode 100644 index a52ada3b..00000000 --- a/archivebox/abx/django/use.py +++ /dev/null @@ -1,101 +0,0 @@ -__package__ = 'abx.django' - -import itertools -# from benedict import benedict - -from .. import pm - - -def get_INSTALLED_APPS(): - return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS())) - -# def register_INSTALLLED_APPS(INSTALLED_APPS): -# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS) - - -def get_MIDDLEWARES(): - return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE())) - -# def register_MIDDLEWARES(MIDDLEWARE): -# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE) - - -def get_AUTHENTICATION_BACKENDS(): - return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS())) - -# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): -# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS) - - -def get_STATICFILES_DIRS(): - return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS())) - -# def register_STATICFILES_DIRS(STATICFILES_DIRS): -# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS) - - -def get_TEMPLATE_DIRS(): - return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS())) - -# def register_TEMPLATE_DIRS(TEMPLATE_DIRS): -# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS) - -def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'): - HUEY_QUEUES = {} - for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME): - HUEY_QUEUES.update(plugin_result) - return HUEY_QUEUES - -# def register_DJANGO_HUEY(DJANGO_HUEY): -# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY) - -def get_ADMIN_DATA_VIEWS_URLS(): - return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS())) - -# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): -# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS) - - -# def register_settings(settings): -# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation -# settings_as_obj = benedict(settings, keypath_separator=None) - -# # set default values for settings that are used by plugins -# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', []) -# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', []) -# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', []) -# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', []) -# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', []) -# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}}) -# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []}) - -# # # call all the hook functions to mutate the settings values in-place -# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS) -# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE) -# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS) -# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS) -# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS) -# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY) -# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS) - -# # calls Plugin.settings(settings) on each registered plugin -# pm.hook.register_settings(settings=settings_as_obj) - -# # then finally update the settings globals() object will all the new settings -# # settings.update(settings_as_obj) - - -def get_urlpatterns(): - return list(itertools.chain(*pm.hook.urlpatterns())) - -def register_urlpatterns(urlpatterns): - pm.hook.register_urlpatterns(urlpatterns=urlpatterns) - - -def register_checks(): - """register any django system checks""" - pm.hook.register_checks() - -def register_admin(admin_site): - """register any django admin models/views with the main django admin site instance""" - pm.hook.register_admin(admin_site=admin_site) diff --git a/archivebox/abx/hookspec.py b/archivebox/abx/hookspec.py deleted file mode 100644 index a25f7673..00000000 --- a/archivebox/abx/hookspec.py +++ /dev/null @@ -1,22 +0,0 @@ -from pathlib import Path - -from pluggy import HookimplMarker -from pluggy import HookspecMarker - -spec = hookspec = HookspecMarker("abx") -impl = hookimpl = HookimplMarker("abx") - - -@hookspec -@hookimpl -def get_system_user() -> str: - # Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases - # - sudo (EUD != UID != SUID) - # - running with an autodetected UID based on data dir ownership - # but mapping of UID:username is broken because it was created - # by a different host system, e.g. 911's $HOME outside of docker - # might be /usr/lib/lxd instead of /home/archivebox - # - running as a user that doens't have a home directory - # - home directory is set to a path that doesn't exist, or is inside a dir we cant read - return Path('~').expanduser().name - diff --git a/archivebox/abx/manager.py b/archivebox/abx/manager.py deleted file mode 100644 index 8d44a087..00000000 --- a/archivebox/abx/manager.py +++ /dev/null @@ -1,30 +0,0 @@ -import inspect - -import pluggy - - -class PluginManager(pluggy.PluginManager): - """ - Patch to fix pluggy's PluginManager to work with pydantic models. - See: https://github.com/pytest-dev/pluggy/pull/536 - """ - def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None: - # IMPORTANT: @property methods can have side effects, and are never hookimpl - # if attr is a property, skip it in advance - plugin_class = plugin if inspect.isclass(plugin) else type(plugin) - if isinstance(getattr(plugin_class, name, None), property): - return None - - # pydantic model fields are like attrs and also can never be hookimpls - plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__") - if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}): - # pydantic models mess with the class and attr __signature__ - # so inspect.isroutine(...) throws exceptions and cant be used - return None - - try: - return super().parse_hookimpl_opts(plugin, name) - except AttributeError: - return super().parse_hookimpl_opts(type(plugin), name) - -pm = PluginManager("abx") diff --git a/archivebox/abx/pydantic_pkgr/__init__.py b/archivebox/abx/pydantic_pkgr/__init__.py deleted file mode 100644 index 28cd0f81..00000000 --- a/archivebox/abx/pydantic_pkgr/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__package__ = 'abx.pydantic_pkgr' diff --git a/archivebox/abx/pydantic_pkgr/hookspec.py b/archivebox/abx/pydantic_pkgr/hookspec.py deleted file mode 100644 index 6b293abb..00000000 --- a/archivebox/abx/pydantic_pkgr/hookspec.py +++ /dev/null @@ -1,13 +0,0 @@ - -from ..hookspec import hookspec - -########################################################################################### - -@hookspec -def get_BINPROVIDERS(): - return {} - -@hookspec -def get_BINARIES(): - return {} - diff --git a/archivebox/plugins_auth/__init__.py b/archivebox/actors/__init__.py similarity index 100% rename from archivebox/plugins_auth/__init__.py rename to archivebox/actors/__init__.py diff --git a/archivebox/actors/actor.py b/archivebox/actors/actor.py new file mode 100644 index 00000000..62369793 --- /dev/null +++ b/archivebox/actors/actor.py @@ -0,0 +1,313 @@ +__package__ = 'archivebox.actors' + +import os +import time +from abc import ABC, abstractmethod +from typing import ClassVar, Generic, TypeVar, Any, cast, Literal, Type +from django.utils.functional import classproperty + +from rich import print +import psutil + +from django import db +from django.db import models +from django.db.models import QuerySet +from multiprocessing import Process, cpu_count +from threading import Thread, get_native_id + +# from archivebox.logging_util import TimedProgress + +LaunchKwargs = dict[str, Any] + +ModelType = TypeVar('ModelType', bound=models.Model) + +class ActorType(ABC, Generic[ModelType]): + """ + Base class for all actors. Usage: + class FaviconActor(ActorType[ArchiveResult]): + QUERYSET: ClassVar[QuerySet] = ArchiveResult.objects.filter(status='queued', extractor='favicon') + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + ATOMIC: ClassVar[bool] = True + + def claim_sql_set(self, obj: ArchiveResult) -> str: + # SQL fields to update atomically while claiming an object from the queue + retry_at = datetime.now() + timedelta(seconds=self.MAX_TICK_TIME) + return f"status = 'started', locked_by = {self.pid}, retry_at = {retry_at}" + + def tick(self, obj: ArchiveResult) -> None: + run_favicon_extractor(obj) + ArchiveResult.objects.filter(pk=obj.pk, status='started').update(status='success') + """ + pid: int + idle_count: int = 0 + launch_kwargs: LaunchKwargs = {} + mode: Literal['thread', 'process'] = 'process' + + MAX_CONCURRENT_ACTORS: ClassVar[int] = min(max(2, int(cpu_count() * 0.6)), 8) # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + QUERYSET: ClassVar[QuerySet] # the QuerySet to claim objects from + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 # the number of objects to consider when atomically getting the next object from the queue + ATOMIC: ClassVar[bool] = True # whether to atomically fetch+claim the nextobject in one step, or fetch and lock it in two steps + + # model_type: Type[ModelType] + + _SPAWNED_ACTOR_PIDS: ClassVar[list[psutil.Process]] = [] # record all the pids of Actors spawned by this class + + def __init__(self, mode: Literal['thread', 'process']|None=None, **launch_kwargs: LaunchKwargs): + self.mode = mode or self.mode + self.launch_kwargs = launch_kwargs or dict(self.launch_kwargs) + + @classproperty + def name(cls) -> str: + return cls.__name__ # type: ignore + + def __str__(self) -> str: + return self.__repr__() + + def __repr__(self) -> str: + """FaviconActor[pid=1234]""" + label = 'pid' if self.mode == 'process' else 'tid' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' + + ### Class Methods: Called by Orchestrator on ActorType class before it has been spawned + + @classmethod + def get_running_actors(cls) -> list[int]: + """returns a list of pids of all running actors of this type""" + # WARNING: only works for process actors, not thread actors + if cls.mode == 'thread': + raise NotImplementedError('get_running_actors() is not implemented for thread actors') + return [ + proc.pid for proc in cls._SPAWNED_ACTOR_PIDS + if proc.is_running() and proc.status() != 'zombie' + ] + + @classmethod + def get_actors_to_spawn(cls, queue: QuerySet, running_actors: list[int]) -> list[LaunchKwargs]: + """Get a list of launch kwargs for the number of actors to spawn based on the queue and currently running actors""" + queue_length = queue.count() + if not queue_length: # queue is empty, spawn 0 actors + return [] + + actors_to_spawn: list[LaunchKwargs] = [] + max_spawnable = cls.MAX_CONCURRENT_ACTORS - len(running_actors) + + # spawning new actors is expensive, avoid spawning all the actors at once. To stagger them, + # let the next orchestrator tick handle starting another 2 on the next tick() + # if queue_length > 10: # queue is long, spawn as many as possible + # actors_to_spawn += max_spawnable * [{}] + + if queue_length > 4: # queue is medium, spawn 1 or 2 actors + actors_to_spawn += min(2, max_spawnable) * [{**cls.launch_kwargs}] + else: # queue is short, spawn 1 actor + actors_to_spawn += min(1, max_spawnable) * [{**cls.launch_kwargs}] + return actors_to_spawn + + @classmethod + def start(cls, mode: Literal['thread', 'process']='process', **launch_kwargs: LaunchKwargs) -> int: + if mode == 'thread': + return cls.fork_actor_as_thread(**launch_kwargs) + elif mode == 'process': + return cls.fork_actor_as_process(**launch_kwargs) + raise ValueError(f'Invalid actor mode: {mode} must be "thread" or "process"') + + @classmethod + def fork_actor_as_thread(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background thread running the actor's runloop""" + actor = cls(mode='thread', **launch_kwargs) + bg_actor_thread = Thread(target=actor.runloop) + bg_actor_thread.start() + assert bg_actor_thread.native_id is not None + return bg_actor_thread.native_id + + @classmethod + def fork_actor_as_process(cls, **launch_kwargs: LaunchKwargs) -> int: + """Spawn a new background process running the actor's runloop""" + actor = cls(mode='process', **launch_kwargs) + bg_actor_process = Process(target=actor.runloop) + bg_actor_process.start() + assert bg_actor_process.pid is not None + cls._SPAWNED_ACTOR_PIDS.append(psutil.Process(pid=bg_actor_process.pid)) + return bg_actor_process.pid + + @classmethod + def get_model(cls) -> Type[ModelType]: + # wish this was a @classproperty but Generic[ModelType] return type cant be statically inferred for @classproperty + return cls.QUERYSET.model + + @classmethod + def get_queue(cls) -> QuerySet: + """override this to provide your queryset as the queue""" + # return ArchiveResult.objects.filter(status='queued', extractor__in=('pdf', 'dom', 'screenshot')) + return cls.QUERYSET + + ### Instance Methods: Called by Actor after it has been spawned (i.e. forked as a thread or process) + + def runloop(self): + """The main runloop that starts running when the actor is spawned (as subprocess or thread) and exits when the queue is empty""" + self.on_startup() + try: + while True: + obj_to_process: ModelType | None = None + try: + obj_to_process = cast(ModelType, self.get_next(atomic=self.atomic)) + except Exception: + pass + + if obj_to_process: + self.idle_count = 0 # reset idle count if we got an object + else: + if self.idle_count >= 30: + break # stop looping and exit if queue is empty and we have idled for 30sec + else: + # print('Actor runloop()', f'pid={self.pid}', 'queue empty, rechecking...') + self.idle_count += 1 + time.sleep(1) + continue + + self.on_tick_start(obj_to_process) + + # Process the object + try: + self.tick(obj_to_process) + except Exception as err: + print(f'[red]🏃‍♂️ ERROR: {self}.tick()[/red]', err) + db.connections.close_all() # always reset the db connection after an exception to clear any pending transactions + self.on_tick_exception(obj_to_process, err) + finally: + self.on_tick_end(obj_to_process) + + self.on_shutdown(err=None) + except BaseException as err: + if isinstance(err, KeyboardInterrupt): + print() + else: + print(f'\n[red]🏃‍♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) + self.on_shutdown(err=err) + + def get_next(self, atomic: bool | None=None) -> ModelType | None: + """get the next object from the queue, atomically locking it if self.atomic=True""" + if atomic is None: + atomic = self.ATOMIC + + if atomic: + # fetch and claim the next object from in the queue in one go atomically + obj = self.get_next_atomic() + else: + # two-step claim: fetch the next object and lock it in a separate query + obj = self.get_queue().last() + assert obj and self.lock_next(obj), f'Unable to fetch+lock the next {self.get_model().__name__} ojbect from {self}.QUEUE' + return obj + + def lock_next(self, obj: ModelType) -> bool: + """override this to implement a custom two-step (non-atomic)lock mechanism""" + # For example: + # assert obj._model.objects.filter(pk=obj.pk, status='queued').update(status='started', locked_by=self.pid) + # Not needed if using get_next_and_lock() to claim the object atomically + # print(f'[blue]🏃‍♂️ {self}.lock()[/blue]', obj.abid or obj.id) + return True + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + return self.CLAIM_SET + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def get_next_atomic(self, shallow: bool=True) -> ModelType | None: + """ + claim a random object from the top n=50 objects in the queue (atomically updates status=queued->started for claimed object) + optimized for minimizing contention on the queue with other actors selecting from the same list + slightly faster than claim_any_obj() which selects randomly from the entire queue but needs to know the total count + """ + Model = self.get_model() # e.g. ArchiveResult + table = f'{Model._meta.app_label}_{Model._meta.model_name}' # e.g. core_archiveresult + + where_sql = self.claim_sql_where() + set_sql = self.claim_sql_set() + order_by_sql = self.claim_sql_order() + choose_from_top = self.claim_from_top() + + with db.connection.cursor() as cursor: + # subquery gets the pool of the top 50 candidates sorted by sort and order + # main query selects a random one from that pool + cursor.execute(f""" + UPDATE {table} + SET {set_sql} + WHERE {where_sql} and id = ( + SELECT id FROM ( + SELECT id FROM {table} + WHERE {where_sql} + ORDER BY {order_by_sql} + LIMIT {choose_from_top} + ) candidates + ORDER BY RANDOM() + LIMIT 1 + ) + RETURNING id; + """) + result = cursor.fetchone() + + if result is None: + return None # If no rows were claimed, return None + + if shallow: + # shallow: faster, returns potentially incomplete object instance missing some django auto-populated fields: + columns = [col[0] for col in cursor.description or ['id']] + return Model(**dict(zip(columns, result))) + + # if not shallow do one extra query to get a more complete object instance (load it fully from scratch) + return Model.objects.get(id=result[0]) + + @abstractmethod + def tick(self, obj: ModelType) -> None: + """override this to process the object""" + print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_startup(self) -> None: + if self.mode == 'thread': + self.pid = get_native_id() # thread id + print(f'[green]🏃‍♂️ {self}.on_startup() STARTUP (THREAD)[/green]') + else: + self.pid = os.getpid() # process id + print(f'[green]🏃‍♂️ {self}.on_startup() STARTUP (PROCESS)[/green]') + # abx.pm.hook.on_actor_startup(self) + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: ModelType) -> None: + # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: ModelType) -> None: + # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: ModelType, err: BaseException) -> None: + print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/actors/admin.py b/archivebox/actors/admin.py new file mode 100644 index 00000000..8c38f3f3 --- /dev/null +++ b/archivebox/actors/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/archivebox/actors/apps.py b/archivebox/actors/apps.py new file mode 100644 index 00000000..2347ac3f --- /dev/null +++ b/archivebox/actors/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ActorsConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "actors" diff --git a/archivebox/plugins_extractor/__init__.py b/archivebox/actors/migrations/__init__.py similarity index 100% rename from archivebox/plugins_extractor/__init__.py rename to archivebox/actors/migrations/__init__.py diff --git a/archivebox/actors/models.py b/archivebox/actors/models.py new file mode 100644 index 00000000..71a83623 --- /dev/null +++ b/archivebox/actors/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/archivebox/actors/orchestrator.py b/archivebox/actors/orchestrator.py new file mode 100644 index 00000000..df4c860b --- /dev/null +++ b/archivebox/actors/orchestrator.py @@ -0,0 +1,244 @@ +__package__ = 'archivebox.actors' + +import os +import time +import itertools +from typing import Dict, Type, Literal, ClassVar +from django.utils.functional import classproperty + +from multiprocessing import Process, cpu_count +from threading import Thread, get_native_id + + +from rich import print + +from django.db.models import QuerySet + +from django.apps import apps +from .actor import ActorType + +class Orchestrator: + pid: int + idle_count: int = 0 + actor_types: Dict[str, Type[ActorType]] + mode: Literal['thread', 'process'] = 'process' + + def __init__(self, actor_types: Dict[str, Type[ActorType]] | None = None, mode: Literal['thread', 'process'] | None=None): + self.actor_types = actor_types or self.actor_types or self.autodiscover_actor_types() + self.mode = mode or self.mode + + def __repr__(self) -> str: + label = 'tid' if self.mode == 'thread' else 'pid' + return f'[underline]{self.name}[/underline]\\[{label}={self.pid}]' + + def __str__(self) -> str: + return self.__repr__() + + @classproperty + def name(cls) -> str: + return cls.__name__ # type: ignore + + def fork_as_thread(self): + self.thread = Thread(target=self.runloop) + self.thread.start() + assert self.thread.native_id is not None + return self.thread.native_id + + def fork_as_process(self): + self.process = Process(target=self.runloop) + self.process.start() + assert self.process.pid is not None + return self.process.pid + + def start(self) -> int: + if self.mode == 'thread': + return self.fork_as_thread() + elif self.mode == 'process': + return self.fork_as_process() + raise ValueError(f'Invalid orchestrator mode: {self.mode}') + + @classmethod + def autodiscover_actor_types(cls) -> Dict[str, Type[ActorType]]: + # returns a Dict of all discovered {actor_type_id: ActorType} across the codebase + # override this method in a subclass to customize the actor types that are used + # return {'Snapshot': SnapshotActorType, 'ArchiveResult_chrome': ChromeActorType, ...} + return { + # look through all models and find all classes that inherit from ActorType + # actor_type.__name__: actor_type + # for actor_type in abx.pm.hook.get_all_ACTORS_TYPES().values() + } + + @classmethod + def get_orphaned_objects(cls, all_queues) -> list: + # returns a list of objects that are in the queues of all actor types but not in the queues of any other actor types + all_queued_ids = itertools.chain(*[queue.values('id', flat=True) for queue in all_queues.values()]) + orphaned_objects = [] + for model in apps.get_models(): + if hasattr(model, 'retry_at'): + orphaned_objects.extend(model.objects.filter(retry_at__lt=timezone.now()).exclude(id__in=all_queued_ids)) + return orphaned_objects + + def on_startup(self): + if self.mode == 'thread': + self.pid = get_native_id() + print(f'[green]👨‍✈️ {self}.on_startup() STARTUP (THREAD)[/green]') + elif self.mode == 'process': + self.pid = os.getpid() + print(f'[green]👨‍✈️ {self}.on_startup() STARTUP (PROCESS)[/green]') + # abx.pm.hook.on_orchestrator_startup(self) + + def on_shutdown(self, err: BaseException | None = None): + print(f'[grey53]👨‍✈️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_orchestrator_shutdown(self) + + def on_tick_started(self, all_queues): + # total_pending = sum(queue.count() for queue in all_queues.values()) + # print(f'👨‍✈️ {self}.on_tick_started()', f'total_pending={total_pending}') + # abx.pm.hook.on_orchestrator_tick_started(self, actor_types, all_queues) + pass + + def on_tick_finished(self, all_queues, all_existing_actors, all_spawned_actors): + if all_spawned_actors: + total_queue_length = sum(queue.count() for queue in all_queues.values()) + print(f'[grey53]👨‍✈️ {self}.on_tick_finished() queue={total_queue_length} existing_actors={len(all_existing_actors)} spawned_actors={len(all_spawned_actors)}[/grey53]') + # abx.pm.hook.on_orchestrator_tick_finished(self, actor_types, all_queues) + + def on_idle(self, all_queues): + # print(f'👨‍✈️ {self}.on_idle()') + # abx.pm.hook.on_orchestrator_idle(self) + # check for orphaned objects left behind + if self.idle_count == 60: + orphaned_objects = self.get_orphaned_objects(all_queues) + if orphaned_objects: + print('[red]👨‍✈️ WARNING: some objects may not be processed, no actor has claimed them after 60s:[/red]', orphaned_objects) + + def runloop(self): + self.on_startup() + try: + while True: + all_queues = { + actor_type: actor_type.get_queue() + for actor_type in self.actor_types.values() + } + if not all_queues: + raise Exception('Failed to find any actor_types to process') + + self.on_tick_started(all_queues) + + all_existing_actors = [] + all_spawned_actors = [] + + for actor_type, queue in all_queues.items(): + try: + existing_actors = actor_type.get_running_actors() + all_existing_actors.extend(existing_actors) + actors_to_spawn = actor_type.get_actors_to_spawn(queue, existing_actors) + for launch_kwargs in actors_to_spawn: + new_actor_pid = actor_type.start(mode='process', **launch_kwargs) + all_spawned_actors.append(new_actor_pid) + except Exception as err: + print(f'🏃‍♂️ ERROR: {self} Failed to get {actor_type} queue & running actors', err) + except BaseException: + raise + + if not any(queue.exists() for queue in all_queues.values()): + self.on_idle(all_queues) + self.idle_count += 1 + time.sleep(1) + else: + self.idle_count = 0 + + self.on_tick_finished(all_queues, all_existing_actors, all_spawned_actors) + time.sleep(1) + + except BaseException as err: + if isinstance(err, KeyboardInterrupt): + print() + else: + print(f'\n[red]🏃‍♂️ {self}.runloop() FATAL:[/red]', err.__class__.__name__, err) + self.on_shutdown(err=err) + + + +from archivebox.config.django import setup_django + +setup_django() + +from core.models import ArchiveResult, Snapshot + +from django.utils import timezone + +from django import db +from django.db import connection + + +from crawls.actors import CrawlActor +from .actor_snapshot import SnapshotActor + +from abx_plugin_singlefile.actors import SinglefileActor + + +class FaviconActor(ActorType[ArchiveResult]): + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_SET: ClassVar[str] = 'status = "started"' + + @classproperty + def QUERYSET(cls) -> QuerySet: + return ArchiveResult.objects.filter(status='failed', extractor='favicon') + + def tick(self, obj: ArchiveResult): + print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) + updated = ArchiveResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 + if not updated: + raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') + obj.refresh_from_db() + obj.save() + + +class ExtractorsOrchestrator(Orchestrator): + actor_types = { + 'CrawlActor': CrawlActor, + 'SnapshotActor': SnapshotActor, + 'FaviconActor': FaviconActor, + 'SinglefileActor': SinglefileActor, + } + + +if __name__ == '__main__': + orchestrator = ExtractorsOrchestrator() + orchestrator.start() + + snap = Snapshot.objects.last() + assert snap is not None + created = 0 + while True: + time.sleep(0.05) + # try: + # ArchiveResult.objects.bulk_create([ + # ArchiveResult( + # id=uuid.uuid4(), + # snapshot=snap, + # status='failed', + # extractor='favicon', + # cmd=['echo', '"hello"'], + # cmd_version='1.0', + # pwd='.', + # start_ts=timezone.now(), + # end_ts=timezone.now(), + # created_at=timezone.now(), + # modified_at=timezone.now(), + # created_by_id=1, + # ) + # for _ in range(100) + # ]) + # created += 100 + # if created % 1000 == 0: + # print(f'[blue]Created {created} ArchiveResults...[/blue]') + # time.sleep(25) + # except Exception as err: + # print(err) + # db.connections.close_all() + # except BaseException as err: + # print(err) + # break diff --git a/archivebox/actors/statemachine.py b/archivebox/actors/statemachine.py new file mode 100644 index 00000000..53883120 --- /dev/null +++ b/archivebox/actors/statemachine.py @@ -0,0 +1,286 @@ +from statemachine import State, StateMachine +from django.db import models +from multiprocessing import Process +import psutil +import time + +# State Machine Definitions +################################################# + +class SnapshotMachine(StateMachine): + """State machine for managing Snapshot lifecycle.""" + + # States + queued = State(initial=True) + started = State() + sealed = State(final=True) + + # Transitions + start = queued.to(started, cond='can_start') + seal = started.to(sealed, cond='is_finished') + + # Events + tick = ( + queued.to.itself(unless='can_start') | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished') | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, snapshot): + self.snapshot = snapshot + super().__init__() + + def can_start(self): + return True + + def is_finished(self): + return not self.snapshot.has_pending_archiveresults() + + def before_start(self): + """Pre-start validation and setup.""" + self.snapshot.cleanup_dir() + + def after_start(self): + """Post-start side effects.""" + self.snapshot.create_pending_archiveresults() + self.snapshot.update_indices() + self.snapshot.bump_retry_at(seconds=10) + + def before_seal(self): + """Pre-seal validation and cleanup.""" + self.snapshot.cleanup_dir() + + def after_seal(self): + """Post-seal actions.""" + self.snapshot.update_indices() + self.snapshot.seal_dir() + self.snapshot.upload_dir() + self.snapshot.retry_at = None + self.snapshot.save() + + +class ArchiveResultMachine(StateMachine): + """State machine for managing ArchiveResult lifecycle.""" + + # States + queued = State(initial=True) + started = State() + succeeded = State(final=True) + backoff = State() + failed = State(final=True) + + # Transitions + start = queued.to(started, cond='can_start') + succeed = started.to(succeeded, cond='extractor_succeeded') + backoff = started.to(backoff, unless='extractor_succeeded') + retry = backoff.to(queued, cond='can_retry') + fail = backoff.to(failed, unless='can_retry') + + # Events + tick = ( + queued.to.itself(unless='can_start') | + queued.to(started, cond='can_start') | + started.to.itself(cond='extractor_still_running') | + started.to(succeeded, cond='extractor_succeeded') | + started.to(backoff, unless='extractor_succeeded') | + backoff.to.itself(cond='still_waiting_to_retry') | + backoff.to(queued, cond='can_retry') | + backoff.to(failed, unless='can_retry') + ) + + def __init__(self, archiveresult): + self.archiveresult = archiveresult + super().__init__() + + def can_start(self): + return True + + def extractor_still_running(self): + return self.archiveresult.start_ts > time.now() - timedelta(seconds=5) + + def extractor_succeeded(self): + # return check_if_extractor_succeeded(self.archiveresult) + return self.archiveresult.start_ts < time.now() - timedelta(seconds=5) + + def can_retry(self): + return self.archiveresult.retries < self.archiveresult.max_retries + + def before_start(self): + """Pre-start initialization.""" + self.archiveresult.retries += 1 + self.archiveresult.start_ts = time.now() + self.archiveresult.output = None + self.archiveresult.error = None + + def after_start(self): + """Post-start execution.""" + self.archiveresult.bump_retry_at(seconds=self.archiveresult.timeout + 5) + execute_extractor(self.archiveresult) + self.archiveresult.snapshot.bump_retry_at(seconds=5) + + def before_succeed(self): + """Pre-success validation.""" + self.archiveresult.output = get_archiveresult_output(self.archiveresult) + + def after_succeed(self): + """Post-success cleanup.""" + self.archiveresult.end_ts = time.now() + self.archiveresult.retry_at = None + self.archiveresult.update_indices() + + def before_backoff(self): + """Pre-backoff error capture.""" + self.archiveresult.error = get_archiveresult_error(self.archiveresult) + + def after_backoff(self): + """Post-backoff retry scheduling.""" + self.archiveresult.end_ts = time.now() + self.archiveresult.bump_retry_at( + seconds=self.archiveresult.timeout * self.archiveresult.retries + ) + self.archiveresult.update_indices() + + def before_fail(self): + """Pre-failure finalization.""" + self.archiveresult.retry_at = None + + def after_fail(self): + """Post-failure cleanup.""" + self.archiveresult.update_indices() + +# Models +################################################# + +class Snapshot(models.Model): + status = models.CharField(max_length=32, default='queued') + retry_at = models.DateTimeField(null=True) + + @property + def sm(self): + """Get the state machine for this snapshot.""" + return SnapshotMachine(self) + + def has_pending_archiveresults(self): + return self.archiveresult_set.exclude( + status__in=['succeeded', 'failed'] + ).exists() + + def bump_retry_at(self, seconds): + self.retry_at = time.now() + timedelta(seconds=seconds) + self.save() + + def cleanup_dir(self): + cleanup_snapshot_dir(self) + + def create_pending_archiveresults(self): + create_snapshot_pending_archiveresults(self) + + def update_indices(self): + update_snapshot_index_json(self) + update_snapshot_index_html(self) + + def seal_dir(self): + seal_snapshot_dir(self) + + def upload_dir(self): + upload_snapshot_dir(self) + + +class ArchiveResult(models.Model): + snapshot = models.ForeignKey(Snapshot, on_delete=models.CASCADE) + status = models.CharField(max_length=32, default='queued') + retry_at = models.DateTimeField(null=True) + retries = models.IntegerField(default=0) + max_retries = models.IntegerField(default=3) + timeout = models.IntegerField(default=60) + start_ts = models.DateTimeField(null=True) + end_ts = models.DateTimeField(null=True) + output = models.TextField(null=True) + error = models.TextField(null=True) + + def get_machine(self): + return ArchiveResultMachine(self) + + def bump_retry_at(self, seconds): + self.retry_at = time.now() + timedelta(seconds=seconds) + self.save() + + def update_indices(self): + update_archiveresult_index_json(self) + update_archiveresult_index_html(self) + + +# Actor System +################################################# + +class BaseActor: + MAX_TICK_TIME = 60 + + def tick(self, obj): + """Process a single object through its state machine.""" + machine = obj.get_machine() + + if machine.is_queued: + if machine.can_start(): + machine.start() + + elif machine.is_started: + if machine.can_seal(): + machine.seal() + + elif machine.is_backoff: + if machine.can_retry(): + machine.retry() + else: + machine.fail() + + +class Orchestrator: + """Main orchestrator that manages all actors.""" + + def __init__(self): + self.pid = None + + @classmethod + def spawn(cls): + orchestrator = cls() + proc = Process(target=orchestrator.runloop) + proc.start() + return proc.pid + + def runloop(self): + self.pid = os.getpid() + abx.pm.hook.on_orchestrator_startup(self) + + try: + while True: + self.process_queue(Snapshot) + self.process_queue(ArchiveResult) + time.sleep(0.1) + + except (KeyboardInterrupt, SystemExit): + abx.pm.hook.on_orchestrator_shutdown(self) + + def process_queue(self, model): + retry_at_reached = Q(retry_at__isnull=True) | Q(retry_at__lte=time.now()) + queue = model.objects.filter(retry_at_reached) + + if queue.exists(): + actor = BaseActor() + for obj in queue: + try: + with transaction.atomic(): + actor.tick(obj) + except Exception as e: + abx.pm.hook.on_actor_tick_exception(actor, obj, e) + + +# Periodic Tasks +################################################# + +@djhuey.periodic_task(schedule=djhuey.crontab(minute='*')) +def ensure_orchestrator_running(): + """Ensure orchestrator is running, start if not.""" + if not any(p.name().startswith('Orchestrator') for p in psutil.process_iter()): + Orchestrator.spawn() diff --git a/archivebox/actors/tests.py b/archivebox/actors/tests.py new file mode 100644 index 00000000..7ce503c2 --- /dev/null +++ b/archivebox/actors/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/archivebox/actors/views.py b/archivebox/actors/views.py new file mode 100644 index 00000000..91ea44a2 --- /dev/null +++ b/archivebox/actors/views.py @@ -0,0 +1,3 @@ +from django.shortcuts import render + +# Create your views here. diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 1a3f8a7b..8513d682 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -1,4 +1,5 @@ -__package__ = 'archivebox.config' +__package__ = 'config' +__order__ = 200 from .paths import ( PACKAGE_DIR, # noqa @@ -8,35 +9,28 @@ from .paths import ( from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .version import VERSION # noqa - -import abx - +# import abx # @abx.hookimpl -# def get_INSTALLED_APPS(): -# return ['config'] +# def get_CONFIG(): +# from .common import ( +# SHELL_CONFIG, +# STORAGE_CONFIG, +# GENERAL_CONFIG, +# SERVER_CONFIG, +# ARCHIVING_CONFIG, +# SEARCH_BACKEND_CONFIG, +# ) +# return { +# 'SHELL_CONFIG': SHELL_CONFIG, +# 'STORAGE_CONFIG': STORAGE_CONFIG, +# 'GENERAL_CONFIG': GENERAL_CONFIG, +# 'SERVER_CONFIG': SERVER_CONFIG, +# 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, +# 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, +# } - -@abx.hookimpl -def get_CONFIG(): - from .common import ( - SHELL_CONFIG, - STORAGE_CONFIG, - GENERAL_CONFIG, - SERVER_CONFIG, - ARCHIVING_CONFIG, - SEARCH_BACKEND_CONFIG, - ) - return { - 'SHELL_CONFIG': SHELL_CONFIG, - 'STORAGE_CONFIG': STORAGE_CONFIG, - 'GENERAL_CONFIG': GENERAL_CONFIG, - 'SERVER_CONFIG': SERVER_CONFIG, - 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, - 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, - } - -@abx.hookimpl -def ready(): - for config in get_CONFIG().values(): - config.validate() +# @abx.hookimpl +# def ready(): +# for config in get_CONFIG().values(): +# config.validate() diff --git a/archivebox/config/configfile.py b/archivebox/config/collection.py similarity index 93% rename from archivebox/config/configfile.py rename to archivebox/config/collection.py index c489e114..d0c5a273 100644 --- a/archivebox/config/configfile.py +++ b/archivebox/config/collection.py @@ -9,16 +9,18 @@ from configparser import ConfigParser from benedict import benedict +import archivebox + from archivebox.config.constants import CONSTANTS from archivebox.misc.logging import stderr def get_real_name(key: str) -> str: - """get the current canonical name for a given deprecated config key""" - from django.conf import settings + """get the up-to-date canonical name for a given old alias or current key""" + CONFIGS = archivebox.pm.hook.get_CONFIGS() - for section in settings.CONFIGS.values(): + for section in CONFIGS.values(): try: return section.aliases[key] except KeyError: @@ -115,17 +117,15 @@ def load_config_file() -> Optional[benedict]: def section_for_key(key: str) -> Any: - from django.conf import settings - for config_section in settings.CONFIGS.values(): + for config_section in archivebox.pm.hook.get_CONFIGS().values(): if hasattr(config_section, key): return config_section - return None + raise ValueError(f'No config section found for key: {key}') def write_config_file(config: Dict[str, str]) -> benedict: """load the ini-formatted config file from DATA_DIR/Archivebox.conf""" - import abx.archivebox.reads from archivebox.misc.system import atomic_write CONFIG_HEADER = ( @@ -175,7 +175,7 @@ def write_config_file(config: Dict[str, str]) -> benedict: updated_config = {} try: # validate the updated_config by attempting to re-parse it - updated_config = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()} + updated_config = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()} except BaseException: # lgtm [py/catch-base-exception] # something went horribly wrong, revert to the previous version with open(f'{config_path}.bak', 'r', encoding='utf-8') as old: @@ -233,11 +233,11 @@ def load_config(defaults: Dict[str, Any], return benedict(extended_config) def load_all_config(): - import abx.archivebox.reads + import abx flat_config = benedict() - for config_section in abx.archivebox.reads.get_CONFIGS().values(): + for config_section in abx.pm.hook.get_CONFIGS().values(): config_section.__init__() flat_config.update(config_section.model_dump()) diff --git a/archivebox/config/common.py b/archivebox/config/common.py index 15f575f4..ee6c438b 100644 --- a/archivebox/config/common.py +++ b/archivebox/config/common.py @@ -10,7 +10,7 @@ from rich import print from pydantic import Field, field_validator from django.utils.crypto import get_random_string -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from .constants import CONSTANTS from .version import get_COMMIT_HASH, get_BUILD_TIME, VERSION @@ -45,8 +45,6 @@ class ShellConfig(BaseConfigSet): def BUILD_TIME(self) -> str: return get_BUILD_TIME() - # def VERSIONS_AVAILABLE() -> bool # .check_for_update.get_versions_available_on_github(c)}, - # def CAN_UPGRADE() -> bool # .check_for_update.can_upgrade(c)}, SHELL_CONFIG = ShellConfig() diff --git a/archivebox/config/constants.py b/archivebox/config/constants.py index b8019f99..5124384d 100644 --- a/archivebox/config/constants.py +++ b/archivebox/config/constants.py @@ -1,3 +1,15 @@ +""" +Constants are for things that never change at runtime. +(but they can change from run-to-run or machine-to-machine) + +DATA_DIR will never change at runtime, but you can run +archivebox from inside a different DATA_DIR on the same machine. + +This is loaded very early in the archivebox startup flow, so nothing in this file +or imported from this file should import anything from archivebox.config.common, +django, other INSTALLED_APPS, or anything else that is not in a standard library. +""" + __package__ = 'archivebox.config' import re @@ -197,10 +209,12 @@ class ConstantsDict(Mapping): @classmethod def __getitem__(cls, key: str): + # so it behaves like a dict[key] == dict.key or object attr return getattr(cls, key) @classmethod def __benedict__(cls): + # when casting to benedict, only include uppercase keys that don't start with an underscore return benedict({key: value for key, value in cls.__dict__.items() if key.isupper() and not key.startswith('_')}) @classmethod @@ -214,5 +228,6 @@ class ConstantsDict(Mapping): CONSTANTS = ConstantsDict() CONSTANTS_CONFIG = CONSTANTS.__benedict__() -# add all key: values to globals() for easier importing -globals().update(CONSTANTS) +# add all key: values to globals() for easier importing, e.g.: +# from archivebox.config.constants import IS_ROOT, PERSONAS_DIR, ... +# globals().update(CONSTANTS) diff --git a/archivebox/config/django.py b/archivebox/config/django.py index eb79ab43..073cd2d4 100644 --- a/archivebox/config/django.py +++ b/archivebox/config/django.py @@ -60,7 +60,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None: return with Progress(transient=True, expand=True, console=STDERR) as INITIAL_STARTUP_PROGRESS: - INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25) + INITIAL_STARTUP_PROGRESS_TASK = INITIAL_STARTUP_PROGRESS.add_task("[green]Loading modules...", total=25, visible=False) from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, SudoPermission @@ -97,7 +97,7 @@ def setup_django(check_db=False, in_memory_db=False) -> None: except Exception as e: bump_startup_progress_bar(advance=1000) - is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version', 'init')) + is_using_meta_cmd = any(ignored_subcommand in sys.argv for ignored_subcommand in ('help', 'version', '--help', '--version')) if not is_using_meta_cmd: # show error message to user only if they're not running a meta command / just trying to get help STDERR.print() diff --git a/archivebox/config/version.py b/archivebox/config/version.py index 26df4592..026bfa64 100644 --- a/archivebox/config/version.py +++ b/archivebox/config/version.py @@ -45,7 +45,7 @@ def detect_installed_version(PACKAGE_DIR: Path=PACKAGE_DIR): @cache def get_COMMIT_HASH() -> Optional[str]: try: - git_dir = PACKAGE_DIR / '../.git' + git_dir = PACKAGE_DIR.parent / '.git' ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1] commit_hash = git_dir.joinpath(ref).read_text().strip() return commit_hash @@ -53,7 +53,7 @@ def get_COMMIT_HASH() -> Optional[str]: pass try: - return list((PACKAGE_DIR / '../.git/refs/heads/').glob('*'))[0].read_text().strip() + return list((PACKAGE_DIR.parent / '.git/refs/heads/').glob('*'))[0].read_text().strip() except Exception: pass @@ -62,8 +62,12 @@ def get_COMMIT_HASH() -> Optional[str]: @cache def get_BUILD_TIME() -> str: if IN_DOCKER: - docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] - return docker_build_end_time + try: + # if we're in the archivebox official docker image, /VERSION.txt will contain the build time + docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0] + return docker_build_end_time + except Exception: + pass src_last_modified_unix_timestamp = (PACKAGE_DIR / 'README.md').stat().st_mtime return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s') diff --git a/archivebox/config/views.py b/archivebox/config/views.py index db2c7eaa..975ef7ff 100644 --- a/archivebox/config/views.py +++ b/archivebox/config/views.py @@ -14,8 +14,8 @@ from django.utils.html import format_html, mark_safe from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink -import abx.archivebox.reads - +import abx +import archivebox from archivebox.config import CONSTANTS from archivebox.misc.util import parse_date @@ -65,7 +65,7 @@ def obj_to_yaml(obj: Any, indent: int=0) -> str: @render_with_table_view def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: - + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { @@ -81,12 +81,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: relevant_configs = { key: val - for key, val in settings.FLAT_CONFIG.items() + for key, val in FLAT_CONFIG.items() if '_BINARY' in key or '_VERSION' in key } - for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items(): - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): if not plugin.hooks.get('get_BINARIES'): continue @@ -131,17 +130,16 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: @render_with_item_view def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: - assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' + assert request.user and request.user.is_superuser, 'Must be a superuser to view configuration settings.' binary = None plugin = None - for plugin_id in abx.archivebox.reads.get_PLUGINS().keys(): - loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): try: - for loaded_binary in loaded_plugin.hooks.get_BINARIES().values(): + for loaded_binary in plugin['hooks'].get_BINARIES().values(): if loaded_binary.name == key: binary = loaded_binary - plugin = loaded_plugin + plugin = plugin # break # last write wins except Exception as e: print(e) @@ -161,7 +159,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: "name": binary.name, "description": binary.abspath, "fields": { - 'plugin': plugin.package, + 'plugin': plugin['package'], 'binprovider': binary.loaded_binprovider, 'abspath': binary.loaded_abspath, 'version': binary.loaded_version, @@ -215,9 +213,7 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext: return color return 'black' - for plugin_id in settings.PLUGINS.keys(): - - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.get_all_plugins().items(): plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {}) plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {}) plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {}) @@ -263,7 +259,7 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: assert plugin_id, f'Could not find a plugin matching the specified name: {key}' - plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + plugin = abx.get_plugin(plugin_id) return ItemContext( slug=key, diff --git a/archivebox/core/__init__.py b/archivebox/core/__init__.py index ac3ec769..9a301977 100644 --- a/archivebox/core/__init__.py +++ b/archivebox/core/__init__.py @@ -1,2 +1,31 @@ __package__ = 'archivebox.core' +import abx + +@abx.hookimpl +def register_admin(admin_site): + """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site""" + from core.admin import register_admin + register_admin(admin_site) + + + +@abx.hookimpl +def get_CONFIG(): + from archivebox.config.common import ( + SHELL_CONFIG, + STORAGE_CONFIG, + GENERAL_CONFIG, + SERVER_CONFIG, + ARCHIVING_CONFIG, + SEARCH_BACKEND_CONFIG, + ) + return { + 'SHELL_CONFIG': SHELL_CONFIG, + 'STORAGE_CONFIG': STORAGE_CONFIG, + 'GENERAL_CONFIG': GENERAL_CONFIG, + 'SERVER_CONFIG': SERVER_CONFIG, + 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, + 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, + } + diff --git a/archivebox/core/actors.py b/archivebox/core/actors.py new file mode 100644 index 00000000..30b8245f --- /dev/null +++ b/archivebox/core/actors.py @@ -0,0 +1,73 @@ +__package__ = 'archivebox.core' + +from typing import ClassVar + +from rich import print + +from django.db.models import QuerySet +from django.utils import timezone +from datetime import timedelta +from core.models import Snapshot + +from actors.actor import ActorType + + +class SnapshotActor(ActorType[Snapshot]): + + QUERYSET: ClassVar[QuerySet] = Snapshot.objects.filter(status='queued') + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + retry_at = timezone.now() + timedelta(seconds=self.MAX_TICK_TIME) + # format as 2024-10-31 10:14:33.240903 + retry_at_str = retry_at.strftime('%Y-%m-%d %H:%M:%S.%f') + return f'{self.CLAIM_SET}, retry_at = {retry_at_str}' + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def tick(self, obj: Snapshot) -> None: + """override this to process the object""" + print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + # raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: Snapshot) -> None: + # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: Snapshot) -> None: + # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: Snapshot, err: BaseException) -> None: + print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/core/admin_archiveresults.py b/archivebox/core/admin_archiveresults.py index aff7b1df..675f5f43 100644 --- a/archivebox/core/admin_archiveresults.py +++ b/archivebox/core/admin_archiveresults.py @@ -8,7 +8,7 @@ from django.utils.html import format_html, mark_safe from django.core.exceptions import ValidationError from django.urls import reverse, resolve from django.utils import timezone -from django.forms import forms +from django_jsonform.forms.fields import JSONFormField from huey_monitor.admin import TaskModel @@ -83,7 +83,7 @@ class ArchiveResultInline(admin.TabularInline): formset.form.base_fields['cmd_version'].initial = '-' formset.form.base_fields['pwd'].initial = str(snapshot.link_dir) formset.form.base_fields['created_by'].initial = request.user - formset.form.base_fields['cmd'] = forms.JSONField(initial=['-']) + formset.form.base_fields['cmd'] = JSONFormField(initial=['-']) formset.form.base_fields['output'].initial = 'Manually recorded cmd output...' if obj is not None: diff --git a/archivebox/core/admin_site.py b/archivebox/core/admin_site.py index de92db8c..7aea2cf5 100644 --- a/archivebox/core/admin_site.py +++ b/archivebox/core/admin_site.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.core' from django.contrib import admin -import abx.django.use +import archivebox class ArchiveBoxAdmin(admin.AdminSite): site_header = 'ArchiveBox' @@ -37,6 +37,6 @@ def register_admin_site(): sites.site = archivebox_admin # register all plugins admin classes - abx.django.use.register_admin(archivebox_admin) + archivebox.pm.hook.register_admin(admin_site=archivebox_admin) return archivebox_admin diff --git a/archivebox/core/apps.py b/archivebox/core/apps.py index 870a77f8..b516678f 100644 --- a/archivebox/core/apps.py +++ b/archivebox/core/apps.py @@ -2,7 +2,7 @@ __package__ = 'archivebox.core' from django.apps import AppConfig -import abx +import archivebox class CoreConfig(AppConfig): @@ -10,16 +10,11 @@ class CoreConfig(AppConfig): def ready(self): """Register the archivebox.core.admin_site as the main django admin site""" + from django.conf import settings + archivebox.pm.hook.ready(settings=settings) + from core.admin_site import register_admin_site register_admin_site() - abx.pm.hook.ready() - - -@abx.hookimpl -def register_admin(admin_site): - """Register the core.models views (Snapshot, ArchiveResult, Tag, etc.) with the admin site""" - from core.admin import register_admin - register_admin(admin_site) diff --git a/archivebox/core/models.py b/archivebox/core/models.py index 79776b7f..a3962a6a 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -8,21 +8,25 @@ import os import json from pathlib import Path +from datetime import timedelta from django.db import models from django.utils.functional import cached_property from django.utils.text import slugify +from django.utils import timezone from django.core.cache import cache from django.urls import reverse, reverse_lazy from django.db.models import Case, When, Value, IntegerField from django.contrib import admin from django.conf import settings +from statemachine.mixins import MachineMixin + from archivebox.config import CONSTANTS from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField from queues.tasks import bg_archive_snapshot -# from crawls.models import Crawl +from crawls.models import Crawl # from machine.models import Machine, NetworkInterface from archivebox.misc.system import get_dir_size @@ -152,7 +156,7 @@ class SnapshotManager(models.Manager): return super().get_queryset().prefetch_related('tags', 'archiveresult_set') # .annotate(archiveresult_count=models.Count('archiveresult')).distinct() -class Snapshot(ABIDModel): +class Snapshot(ABIDModel, MachineMixin): abid_prefix = 'snp_' abid_ts_src = 'self.created_at' abid_uri_src = 'self.url' @@ -160,6 +164,17 @@ class Snapshot(ABIDModel): abid_rand_src = 'self.id' abid_drift_allowed = True + state_field_name = 'status' + state_machine_name = 'core.statemachines.SnapshotMachine' + state_machine_attr = 'sm' + + class SnapshotStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SEALED = 'sealed', 'Sealed' + + status = models.CharField(max_length=15, default=SnapshotStatus.QUEUED, null=False, blank=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) @@ -171,7 +186,7 @@ class Snapshot(ABIDModel): bookmarked_at = AutoDateTimeField(default=None, null=False, editable=True, db_index=True) downloaded_at = models.DateTimeField(default=None, null=True, editable=False, db_index=True, blank=True) - # crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set') + crawl = models.ForeignKey(Crawl, on_delete=models.CASCADE, default=None, null=True, blank=True, related_name='snapshot_set') url = models.URLField(unique=True, db_index=True) timestamp = models.CharField(max_length=32, unique=True, db_index=True, editable=False) @@ -396,6 +411,25 @@ class Snapshot(ABIDModel): tags_id.append(Tag.objects.get_or_create(name=tag)[0].pk) self.tags.clear() self.tags.add(*tags_id) + + def has_pending_archiveresults(self) -> bool: + pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED] + pending_archiveresults = self.archiveresult_set.filter(status__in=pending_statuses) + return pending_archiveresults.exists() + + def create_pending_archiveresults(self) -> list['ArchiveResult']: + archiveresults = [] + for extractor in EXTRACTORS: + archiveresult, _created = ArchiveResult.objects.get_or_create( + snapshot=self, + extractor=extractor, + status=ArchiveResult.ArchiveResultStatus.QUEUED, + ) + archiveresults.append(archiveresult) + return archiveresults + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) # def get_storage_dir(self, create=True, symlink=True) -> Path: @@ -452,6 +486,20 @@ class ArchiveResult(ABIDModel): abid_subtype_src = 'self.extractor' abid_rand_src = 'self.id' abid_drift_allowed = True + + state_field_name = 'status' + state_machine_name = 'core.statemachines.ArchiveResultMachine' + state_machine_attr = 'sm' + + class ArchiveResultStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SUCCEEDED = 'succeeded', 'Succeeded' + FAILED = 'failed', 'Failed' + SKIPPED = 'skipped', 'Skipped' + BACKOFF = 'backoff', 'Waiting to retry' + + status = models.CharField(max_length=15, choices=ArchiveResultStatus.choices, default=ArchiveResultStatus.QUEUED, null=False, blank=False) EXTRACTOR_CHOICES = ( ('htmltotext', 'htmltotext'), @@ -469,11 +517,7 @@ class ArchiveResult(ABIDModel): ('title', 'title'), ('wget', 'wget'), ) - STATUS_CHOICES = [ - ("succeeded", "succeeded"), - ("failed", "failed"), - ("skipped", "skipped") - ] + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) @@ -491,7 +535,6 @@ class ArchiveResult(ABIDModel): output = models.CharField(max_length=1024) start_ts = models.DateTimeField(db_index=True) end_ts = models.DateTimeField() - status = models.CharField(max_length=16, choices=STATUS_CHOICES) # the network interface that was used to download this result # uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used') @@ -552,7 +595,15 @@ class ArchiveResult(ABIDModel): return link.canonical_outputs().get(f'{self.extractor}_path') def output_exists(self) -> bool: - return os.access(self.output_path(), os.R_OK) + return os.path.exists(self.output_path()) + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) + + def create_output_dir(self): + snap_dir = self.snapshot_dir + snap_dir.mkdir(parents=True, exist_ok=True) + return snap_dir / self.output_path() # def get_storage_dir(self, create=True, symlink=True): diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 3810954e..e7d673ac 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -9,13 +9,12 @@ from pathlib import Path from django.utils.crypto import get_random_string import abx -import abx.archivebox -import abx.archivebox.reads -import abx.django.use +import archivebox -from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS +from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS # noqa from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa + IS_MIGRATING = 'makemigrations' in sys.argv[:3] or 'migrate' in sys.argv[:3] IS_TESTING = 'test' in sys.argv[:3] or 'PYTEST_CURRENT_TEST' in os.environ IS_SHELL = 'shell' in sys.argv[:3] or 'shell_plus' in sys.argv[:3] @@ -25,45 +24,8 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v ### ArchiveBox Plugin Settings ################################################################################ -PLUGIN_HOOKSPECS = [ - 'abx.django.hookspec', - 'abx.pydantic_pkgr.hookspec', - 'abx.archivebox.hookspec', -] -abx.register_hookspecs(PLUGIN_HOOKSPECS) - -BUILTIN_PLUGIN_DIRS = { - 'archivebox': PACKAGE_DIR, - 'plugins_pkg': PACKAGE_DIR / 'plugins_pkg', - 'plugins_auth': PACKAGE_DIR / 'plugins_auth', - 'plugins_search': PACKAGE_DIR / 'plugins_search', - 'plugins_extractor': PACKAGE_DIR / 'plugins_extractor', -} -USER_PLUGIN_DIRS = { - # 'user_plugins': DATA_DIR / 'user_plugins', -} - -# Discover ArchiveBox plugins -BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS) -PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox') -USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS) -ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} - -# Load ArchiveBox plugins -PLUGIN_MANAGER = abx.pm -abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS) -PLUGINS = abx.archivebox.reads.get_PLUGINS() - -# Load ArchiveBox config from plugins -CONFIGS = abx.archivebox.reads.get_CONFIGS() -CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG() -BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS() -BINARIES = abx.archivebox.reads.get_BINARIES() -EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS() -SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS() -# REPLAYERS = abx.archivebox.reads.get_REPLAYERS() -# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS() - +ALL_PLUGINS = archivebox.ALL_PLUGINS +LOADED_PLUGINS = archivebox.LOADED_PLUGINS ################################################################################ ### Django Core Settings @@ -102,7 +64,8 @@ INSTALLED_APPS = [ # 'abid_utils', # handles ABID ID creation, handling, and models 'config', # ArchiveBox config settings (loaded as a plugin, don't need to add it here) 'machine', # handles collecting and storing information about the host machine, network interfaces, installed binaries, etc. - 'queues', # handles starting and managing background workers and processes + 'actors', # handles starting and managing background workers and processes (orchestrators and actors) + 'queues', # handles starting and managing background workers and processes (supervisord) 'seeds', # handles Seed model and URL source management 'crawls', # handles Crawl and CrawlSchedule models and management 'personas', # handles Persona and session management @@ -110,7 +73,7 @@ INSTALLED_APPS = [ 'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc. # ArchiveBox plugins - *abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, + *abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins, # 3rd-party apps from PyPI that need to be loaded last 'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin @@ -125,6 +88,7 @@ INSTALLED_APPS = [ + MIDDLEWARE = [ 'core.middleware.TimezoneMiddleware', 'django.middleware.security.SecurityMiddleware', @@ -135,7 +99,7 @@ MIDDLEWARE = [ 'core.middleware.ReverseProxyAuthMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'core.middleware.CacheControlMiddleware', - *abx.django.use.get_MIDDLEWARES(), + *abx.as_list(abx.pm.hook.get_MIDDLEWARES()), ] @@ -148,7 +112,7 @@ MIDDLEWARE = [ AUTHENTICATION_BACKENDS = [ 'django.contrib.auth.backends.RemoteUserBackend', 'django.contrib.auth.backends.ModelBackend', - *abx.django.use.get_AUTHENTICATION_BACKENDS(), + *abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()), ] @@ -169,7 +133,7 @@ AUTHENTICATION_BACKENDS = [ STATIC_URL = '/static/' TEMPLATES_DIR_NAME = 'templates' -CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir() +CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) STATICFILES_DIRS = [ *([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []), # *[ @@ -177,7 +141,7 @@ STATICFILES_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'static').is_dir() # ], - *abx.django.use.get_STATICFILES_DIRS(), + *abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'), ] @@ -188,7 +152,7 @@ TEMPLATE_DIRS = [ # for plugin_dir in PLUGIN_DIRS.values() # if (plugin_dir / 'templates').is_dir() # ], - *abx.django.use.get_TEMPLATE_DIRS(), + *abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'), str(PACKAGE_DIR / TEMPLATES_DIR_NAME), @@ -228,7 +192,7 @@ SQLITE_CONNECTION_OPTIONS = { # https://gcollazo.com/optimal-sqlite-settings-for-django/ # https://litestream.io/tips/#busy-timeout # https://docs.djangoproject.com/en/5.1/ref/databases/#setting-pragma-options - "timeout": 5, + "timeout": 10, "check_same_thread": False, "transaction_mode": "IMMEDIATE", "init_command": ( @@ -267,7 +231,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file HUEY = { "huey_class": "huey.SqliteHuey", "filename": CONSTANTS.QUEUE_DATABASE_FILENAME, - "name": "system_tasks", + "name": "commands", "results": True, "store_none": True, "immediate": False, @@ -288,11 +252,11 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file # https://huey.readthedocs.io/en/latest/contrib.html#setting-things-up # https://github.com/gaiacoop/django-huey DJANGO_HUEY = { - "default": "system_tasks", + "default": "commands", "queues": { HUEY["name"]: HUEY.copy(), # more registered here at plugin import-time by BaseQueue.register() - **abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME), + **abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)), }, } @@ -517,7 +481,7 @@ ADMIN_DATA_VIEWS = { "name": "log", }, }, - *abx.django.use.get_ADMIN_DATA_VIEWS_URLS(), + *abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()), ], } @@ -611,7 +575,4 @@ if DEBUG_REQUESTS_TRACKER: # JET_TOKEN = 'some-api-token-here' -abx.django.use.register_checks() -# abx.archivebox.reads.register_all_hooks(globals()) - # import ipdb; ipdb.set_trace() diff --git a/archivebox/core/settings_logging.py b/archivebox/core/settings_logging.py index d9fc28bd..d292e15a 100644 --- a/archivebox/core/settings_logging.py +++ b/archivebox/core/settings_logging.py @@ -163,11 +163,6 @@ SETTINGS_LOGGING = { "level": "DEBUG", "propagate": False, }, - "plugins_extractor": { - "handlers": ["default", "logfile"], - "level": "DEBUG", - "propagate": False, - }, "httpx": { "handlers": ["outbound_webhooks"], "level": "INFO", diff --git a/archivebox/core/statemachines.py b/archivebox/core/statemachines.py new file mode 100644 index 00000000..a2425d43 --- /dev/null +++ b/archivebox/core/statemachines.py @@ -0,0 +1,115 @@ +__package__ = 'archivebox.snapshots' + +from django.utils import timezone + +from statemachine import State, StateMachine + +from core.models import Snapshot, ArchiveResult + +# State Machine Definitions +################################################# + + +class SnapshotMachine(StateMachine, strict_states=True): + """State machine for managing Snapshot lifecycle.""" + + model: Snapshot + + # States + queued = State(value=Snapshot.SnapshotStatus.QUEUED, initial=True) + started = State(value=Snapshot.SnapshotStatus.STARTED) + sealed = State(value=Snapshot.SnapshotStatus.SEALED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, snapshot, *args, **kwargs): + self.snapshot = snapshot + super().__init__(snapshot, *args, **kwargs) + + def can_start(self) -> bool: + return self.snapshot.seed and self.snapshot.seed.uri + + def is_finished(self) -> bool: + return not self.snapshot.has_pending_archiveresults() + + def on_started(self): + self.snapshot.create_pending_archiveresults() + self.snapshot.bump_retry_at(seconds=60) + self.snapshot.save() + + def on_sealed(self): + self.snapshot.retry_at = None + self.snapshot.save() + +class ArchiveResultMachine(StateMachine, strict_states=True): + """State machine for managing ArchiveResult lifecycle.""" + + model: ArchiveResult + + # States + queued = State(value=ArchiveResult.ArchiveResultStatus.QUEUED, initial=True) + started = State(value=ArchiveResult.ArchiveResultStatus.STARTED) + backoff = State(value=ArchiveResult.ArchiveResultStatus.BACKOFF) + succeeded = State(value=ArchiveResult.ArchiveResultStatus.SUCCEEDED, final=True) + failed = State(value=ArchiveResult.ArchiveResultStatus.FAILED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(succeeded, cond='is_succeeded') | + started.to(failed, cond='is_failed') | + started.to(backoff, cond='is_backoff') | + backoff.to.itself(unless='can_start', internal=True) | + backoff.to(started, cond='can_start') | + backoff.to(succeeded, cond='is_succeeded') | + backoff.to(failed, cond='is_failed') + ) + + def __init__(self, archiveresult, *args, **kwargs): + self.archiveresult = archiveresult + super().__init__(archiveresult, *args, **kwargs) + + def can_start(self) -> bool: + return self.archiveresult.snapshot and self.archiveresult.snapshot.is_started() + + def is_succeeded(self) -> bool: + return self.archiveresult.output_exists() + + def is_failed(self) -> bool: + return not self.archiveresult.output_exists() + + def is_backoff(self) -> bool: + return self.archiveresult.status == ArchiveResult.ArchiveResultStatus.BACKOFF + + def on_started(self): + self.archiveresult.start_ts = timezone.now() + self.archiveresult.create_output_dir() + self.archiveresult.bump_retry_at(seconds=60) + self.archiveresult.save() + + def on_backoff(self): + self.archiveresult.bump_retry_at(seconds=60) + self.archiveresult.save() + + def on_succeeded(self): + self.archiveresult.end_ts = timezone.now() + self.archiveresult.save() + + def on_failed(self): + self.archiveresult.end_ts = timezone.now() + self.archiveresult.save() + + def after_transition(self, event: str, source: State, target: State): + print(f"after '{event}' from '{source.id}' to '{target.id}'") + # self.archiveresult.save_merkle_index() + # self.archiveresult.save_html_index() + # self.archiveresult.save_json_index() + return "after_transition" diff --git a/archivebox/core/views.py b/archivebox/core/views.py index d423c146..e425c8fe 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -12,7 +12,6 @@ from django.views import View from django.views.generic.list import ListView from django.views.generic import FormView from django.db.models import Q -from django.conf import settings from django.contrib import messages from django.contrib.auth.mixins import UserPassesTestMixin from django.views.decorators.csrf import csrf_exempt @@ -21,6 +20,7 @@ from django.utils.decorators import method_decorator from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink +import archivebox from core.models import Snapshot from core.forms import AddLinkForm @@ -32,9 +32,8 @@ from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG from archivebox.misc.util import base_url, htmlencode, ts_to_date_str from archivebox.misc.serve_static import serve_static_with_byterange_support -from ..plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG -from ..logging_util import printable_filesize -from ..search import query_search_index +from archivebox.logging_util import printable_filesize +from archivebox.search import query_search_index class HomepageView(View): @@ -69,7 +68,7 @@ class SnapshotView(View): and embed_path and os.access(abs_path, os.R_OK) and abs_path.exists()): - if abs_path.is_dir() and not any(abs_path.glob('*.*')): + if os.path.isdir(abs_path) and not any(abs_path.glob('*.*')): continue result_info = { @@ -103,7 +102,7 @@ class SnapshotView(View): # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list snap_dir = Path(snapshot.link_dir) - assert os.access(snap_dir, os.R_OK) and os.access(snap_dir, os.X_OK) + assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK) for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')): extension = result_file.suffix.lstrip('.').lower() @@ -154,7 +153,7 @@ class SnapshotView(View): 'status_color': 'success' if link.is_archived else 'danger', 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), 'warc_path': warc_path, - 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG, + 'SAVE_ARCHIVE_DOT_ORG': archivebox.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG, 'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS, 'archiveresults': sorted(archiveresults.values(), key=lambda r: all_types.index(r['name']) if r['name'] in all_types else -r['size']), 'best_result': best_result, @@ -500,21 +499,25 @@ class HealthCheckView(View): def find_config_section(key: str) -> str: + CONFIGS = archivebox.pm.hook.get_CONFIGS() + if key in CONSTANTS_CONFIG: return 'CONSTANT' matching_sections = [ - section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields + section_id for section_id, section in CONFIGS.items() if key in section.model_fields ] section = matching_sections[0] if matching_sections else 'DYNAMIC' return section def find_config_default(key: str) -> str: + CONFIGS = archivebox.pm.hook.get_CONFIGS() + if key in CONSTANTS_CONFIG: return str(CONSTANTS_CONFIG[key]) default_val = None - for config in settings.CONFIGS.values(): + for config in CONFIGS.values(): if key in config.model_fields: default_val = config.model_fields[key].default break @@ -530,7 +533,9 @@ def find_config_default(key: str) -> str: return default_val def find_config_type(key: str) -> str: - for config in settings.CONFIGS.values(): + CONFIGS = archivebox.pm.hook.get_CONFIGS() + + for config in CONFIGS.values(): if hasattr(config, key): type_hints = get_type_hints(config) try: @@ -547,7 +552,8 @@ def key_is_safe(key: str) -> bool: @render_with_table_view def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: - + CONFIGS = archivebox.pm.hook.get_CONFIGS() + assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' rows = { @@ -560,7 +566,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: # "Aliases": [], } - for section_id, section in reversed(list(settings.CONFIGS.items())): + for section_id, section in reversed(list(CONFIGS.items())): for key, field in section.model_fields.items(): rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '') rows['Key'].append(ItemLink(key, key=key)) @@ -570,7 +576,6 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: # rows['Documentation'].append(mark_safe(f'Wiki: {key}')) # rows['Aliases'].append(', '.join(find_config_aliases(key))) - section = 'CONSTANT' for key in CONSTANTS_CONFIG.keys(): rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '') @@ -589,7 +594,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: @render_with_item_view def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: - + CONFIGS = archivebox.pm.hook.get_CONFIGS() + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() + assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' # aliases = USER_CONFIG.get(key, {}).get("aliases", []) @@ -597,7 +604,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont if key in CONSTANTS_CONFIG: section_header = mark_safe(f'[CONSTANTS]   {key}   (read-only, hardcoded by ArchiveBox)') - elif key in settings.FLAT_CONFIG: + elif key in FLAT_CONFIG: section_header = mark_safe(f'data / ArchiveBox.conf   [{find_config_section(key)}]   {key}') else: section_header = mark_safe(f'[DYNAMIC CONFIG]   {key}   (read-only, calculated at runtime)') @@ -613,7 +620,7 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont "fields": { 'Key': key, 'Type': find_config_type(key), - 'Value': settings.FLAT_CONFIG.get(key, settings.CONFIGS.get(key, None)) if key_is_safe(key) else '********', + 'Value': FLAT_CONFIG.get(key, CONFIGS.get(key, None)) if key_is_safe(key) else '********', }, "help_texts": { 'Key': mark_safe(f''' @@ -635,13 +642,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont {find_config_default(key) or '↗️ See in ArchiveBox source code...'}

-

+

To change this value, edit data/ArchiveBox.conf or run:

archivebox config --set {key}="{ val.strip("'") if (val := find_config_default(key)) else - (repr(settings.FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'") + (repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'") }"

'''), diff --git a/archivebox/crawls/actors.py b/archivebox/crawls/actors.py new file mode 100644 index 00000000..f159956e --- /dev/null +++ b/archivebox/crawls/actors.py @@ -0,0 +1,69 @@ +__package__ = 'archivebox.crawls' + +from typing import ClassVar + +from rich import print + +from django.db.models import QuerySet + +from crawls.models import Crawl + +from actors.actor import ActorType + + +class CrawlActor(ActorType[Crawl]): + + QUERYSET: ClassVar[QuerySet] = Crawl.objects.filter(status='queued') + CLAIM_WHERE: ClassVar[str] = 'status = "queued"' # the WHERE clause to filter the objects when atomically getting the next object from the queue + CLAIM_SET: ClassVar[str] = 'status = "started"' # the SET clause to claim the object when atomically getting the next object from the queue + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' # the ORDER BY clause to sort the objects with when atomically getting the next object from the queue + CLAIM_FROM_TOP: ClassVar[int] = 50 # the number of objects to consider when atomically getting the next object from the queue + + # model_type: Type[ModelType] + MAX_CONCURRENT_ACTORS: ClassVar[int] = 4 # min 2, max 8, up to 60% of available cpu cores + MAX_TICK_TIME: ClassVar[int] = 60 # maximum duration in seconds to process a single object + + def claim_sql_where(self) -> str: + """override this to implement a custom WHERE clause for the atomic claim step e.g. "status = 'queued' AND locked_by = NULL" """ + return self.CLAIM_WHERE + + def claim_sql_set(self) -> str: + """override this to implement a custom SET clause for the atomic claim step e.g. "status = 'started' AND locked_by = {self.pid}" """ + return self.CLAIM_SET + + def claim_sql_order(self) -> str: + """override this to implement a custom ORDER BY clause for the atomic claim step e.g. "created_at DESC" """ + return self.CLAIM_ORDER + + def claim_from_top(self) -> int: + """override this to implement a custom number of objects to consider when atomically claiming the next object from the top of the queue""" + return self.CLAIM_FROM_TOP + + def tick(self, obj: Crawl) -> None: + """override this to process the object""" + print(f'[blue]🏃‍♂️ {self}.tick()[/blue]', obj.abid or obj.id) + # For example: + # do_some_task(obj) + # do_something_else(obj) + # obj._model.objects.filter(pk=obj.pk, status='started').update(status='success') + # raise NotImplementedError('tick() must be implemented by the Actor subclass') + + def on_shutdown(self, err: BaseException | None=None) -> None: + print(f'[grey53]🏃‍♂️ {self}.on_shutdown() SHUTTING DOWN[/grey53]', err or '[green](gracefully)[/green]') + # abx.pm.hook.on_actor_shutdown(self) + + def on_tick_start(self, obj: Crawl) -> None: + # print(f'🏃‍♂️ {self}.on_tick_start()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_start(self, obj_to_process) + # self.timer = TimedProgress(self.MAX_TICK_TIME, prefix=' ') + pass + + def on_tick_end(self, obj: Crawl) -> None: + # print(f'🏃‍♂️ {self}.on_tick_end()', obj.abid or obj.id) + # abx.pm.hook.on_actor_tick_end(self, obj_to_process) + # self.timer.end() + pass + + def on_tick_exception(self, obj: Crawl, err: BaseException) -> None: + print(f'[red]🏃‍♂️ {self}.on_tick_exception()[/red]', obj.abid or obj.id, err) + # abx.pm.hook.on_actor_tick_exception(self, obj_to_process, err) diff --git a/archivebox/crawls/models.py b/archivebox/crawls/models.py index a806d889..ff9e0d0a 100644 --- a/archivebox/crawls/models.py +++ b/archivebox/crawls/models.py @@ -1,13 +1,20 @@ __package__ = 'archivebox.crawls' +from typing import TYPE_CHECKING from django_stubs_ext.db.models import TypedModelMeta +from datetime import timedelta + from django.db import models -from django.db.models import Q from django.core.validators import MaxValueValidator, MinValueValidator from django.conf import settings -from django.utils import timezone from django.urls import reverse_lazy +from django.utils import timezone + +from statemachine.mixins import MachineMixin + +if TYPE_CHECKING: + from core.models import Snapshot from seeds.models import Seed @@ -41,8 +48,9 @@ class CrawlSchedule(ABIDModel, ModelWithHealthStats): """The base crawl that each new scheduled job should copy as a template""" return self.crawl_set.first() + -class Crawl(ABIDModel, ModelWithHealthStats): +class Crawl(ABIDModel, ModelWithHealthStats, MachineMixin): """ A single session of URLs to archive starting from a given Seed and expanding outwards. An "archiving session" so to speak. @@ -55,16 +63,29 @@ class Crawl(ABIDModel, ModelWithHealthStats): abid_prefix = 'crl_' abid_ts_src = 'self.created_at' abid_uri_src = 'self.seed.uri' - abid_subtype_src = 'self.persona_id' + abid_subtype_src = 'self.persona' abid_rand_src = 'self.id' abid_drift_allowed = True + + state_field_name = 'status' + state_machine_name = 'crawls.statemachines.CrawlMachine' + state_machine_attr = 'sm' + bind_events_as_methods = True + class CrawlStatus(models.TextChoices): + QUEUED = 'queued', 'Queued' + STARTED = 'started', 'Started' + SEALED = 'sealed', 'Sealed' + + status = models.CharField(choices=CrawlStatus.choices, max_length=15, default=CrawlStatus.QUEUED, null=False, blank=False) + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') abid = ABIDField(prefix=abid_prefix) created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, default=None, null=False, related_name='crawl_set') created_at = AutoDateTimeField(default=None, null=False, db_index=True) modified_at = models.DateTimeField(auto_now=True) + seed = models.ForeignKey(Seed, on_delete=models.PROTECT, related_name='crawl_set', null=False, blank=False) max_depth = models.PositiveSmallIntegerField(default=0, validators=[MinValueValidator(0), MaxValueValidator(4)]) @@ -79,7 +100,7 @@ class Crawl(ABIDModel, ModelWithHealthStats): # schedule = models.JSONField() # config = models.JSONField() - # snapshot_set: models.Manager['Snapshot'] + snapshot_set: models.Manager['Snapshot'] class Meta(TypedModelMeta): @@ -102,6 +123,28 @@ class Crawl(ABIDModel, ModelWithHealthStats): @property def api_docs_url(self) -> str: return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl' + + def has_pending_archiveresults(self) -> bool: + from core.models import ArchiveResult + + pending_statuses = [ArchiveResult.ArchiveResultStatus.QUEUED, ArchiveResult.ArchiveResultStatus.STARTED] + + snapshot_ids = self.snapshot_set.values_list('id', flat=True) + pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, status__in=pending_statuses) + return pending_archiveresults.exists() + + def create_root_snapshot(self) -> 'Snapshot': + from core.models import Snapshot + + root_snapshot, _ = Snapshot.objects.get_or_create( + crawl=self, + url=self.seed.uri, + ) + return root_snapshot + + def bump_retry_at(self, seconds: int = 10): + self.retry_at = timezone.now() + timedelta(seconds=seconds) + self.save() class Outlink(models.Model): diff --git a/archivebox/crawls/statemachines.py b/archivebox/crawls/statemachines.py new file mode 100644 index 00000000..b7e43daf --- /dev/null +++ b/archivebox/crawls/statemachines.py @@ -0,0 +1,48 @@ +__package__ = 'archivebox.crawls' + +from statemachine import State, StateMachine + +from crawls.models import Crawl + +# State Machine Definitions +################################################# + + +class CrawlMachine(StateMachine, strict_states=True): + """State machine for managing Crawl lifecycle.""" + + model: Crawl + + # States + queued = State(value=Crawl.CrawlStatus.QUEUED, initial=True) + started = State(value=Crawl.CrawlStatus.STARTED) + sealed = State(value=Crawl.CrawlStatus.SEALED, final=True) + + # Tick Event + tick = ( + queued.to.itself(unless='can_start', internal=True) | + queued.to(started, cond='can_start') | + started.to.itself(unless='is_finished', internal=True) | + started.to(sealed, cond='is_finished') + ) + + def __init__(self, crawl, *args, **kwargs): + self.crawl = crawl + super().__init__(crawl, *args, **kwargs) + + def can_start(self) -> bool: + return self.crawl.seed and self.crawl.seed.uri + + def is_finished(self) -> bool: + return not self.crawl.has_pending_archiveresults() + + + + def on_started(self): + self.crawl.create_root_snapshot() + self.crawl.bump_retry_at(seconds=10) + self.crawl.save() + + def on_sealed(self): + self.crawl.retry_at = None + self.crawl.save() diff --git a/archivebox/extractors/__init__.py b/archivebox/extractors/__init__.py index 07ebb415..42f9d6c7 100644 --- a/archivebox/extractors/__init__.py +++ b/archivebox/extractors/__init__.py @@ -27,43 +27,29 @@ from ..logging_util import ( log_archive_method_finished, ) -from .title import should_save_title, save_title -from .favicon import should_save_favicon, save_favicon -from .wget import should_save_wget, save_wget -from .singlefile import should_save_singlefile, save_singlefile -from .readability import should_save_readability, save_readability -from .mercury import should_save_mercury, save_mercury -from .htmltotext import should_save_htmltotext, save_htmltotext -from .pdf import should_save_pdf, save_pdf -from .screenshot import should_save_screenshot, save_screenshot -from .dom import should_save_dom, save_dom -from .git import should_save_git, save_git -from .media import should_save_media, save_media -from .archive_org import should_save_archive_dot_org, save_archive_dot_org -from .headers import should_save_headers, save_headers - ShouldSaveFunction = Callable[[Link, Optional[Path], Optional[bool]], bool] SaveFunction = Callable[[Link, Optional[Path], int], ArchiveResult] ArchiveMethodEntry = tuple[str, ShouldSaveFunction, SaveFunction] def get_default_archive_methods() -> List[ArchiveMethodEntry]: + # TODO: move to abx.pm.hook.get_EXTRACTORS() return [ - ('favicon', should_save_favicon, save_favicon), - ('headers', should_save_headers, save_headers), - ('singlefile', should_save_singlefile, save_singlefile), - ('pdf', should_save_pdf, save_pdf), - ('screenshot', should_save_screenshot, save_screenshot), - ('dom', should_save_dom, save_dom), - ('wget', should_save_wget, save_wget), - # keep title, readability, and htmltotext below wget and singlefile, as they depend on them - ('title', should_save_title, save_title), - ('readability', should_save_readability, save_readability), - ('mercury', should_save_mercury, save_mercury), - ('htmltotext', should_save_htmltotext, save_htmltotext), - ('git', should_save_git, save_git), - ('media', should_save_media, save_media), - ('archive_org', should_save_archive_dot_org, save_archive_dot_org), + # ('favicon', should_save_favicon, save_favicon), + # ('headers', should_save_headers, save_headers), + # ('singlefile', should_save_singlefile, save_singlefile), + # ('pdf', should_save_pdf, save_pdf), + # ('screenshot', should_save_screenshot, save_screenshot), + # ('dom', should_save_dom, save_dom), + # ('wget', should_save_wget, save_wget), + # # keep title, readability, and htmltotext below wget and singlefile, as they depend on them + # ('title', should_save_title, save_title), + # ('readability', should_save_readability, save_readability), + # ('mercury', should_save_mercury, save_mercury), + # ('htmltotext', should_save_htmltotext, save_htmltotext), + # ('git', should_save_git, save_git), + # ('media', should_save_media, save_media), + # ('archive_org', should_save_archive_dot_org, save_archive_dot_org), ] ARCHIVE_METHODS_INDEXING_PRECEDENCE = [ diff --git a/archivebox/index/html.py b/archivebox/index/html.py index eae93e67..24cad5c0 100644 --- a/archivebox/index/html.py +++ b/archivebox/index/html.py @@ -8,6 +8,8 @@ from typing import List, Optional, Iterator, Mapping from django.utils.html import format_html, mark_safe # type: ignore from django.core.cache import cache +import abx + from archivebox.misc.system import atomic_write from archivebox.misc.util import ( enforce_types, @@ -19,7 +21,6 @@ from archivebox.misc.util import ( from archivebox.config import CONSTANTS, DATA_DIR, VERSION from archivebox.config.common import SERVER_CONFIG from archivebox.config.version import get_COMMIT_HASH -from archivebox.plugins_extractor.archivedotorg.config import ARCHIVEDOTORG_CONFIG from .schema import Link from ..logging_util import printable_filesize @@ -79,8 +80,10 @@ def write_html_link_details(link: Link, out_dir: Optional[str]=None) -> None: @enforce_types def link_details_template(link: Link) -> str: - - from ..extractors.wget import wget_output_path + + from abx_plugin_wget_extractor.wget import wget_output_path + + SAVE_ARCHIVE_DOT_ORG = abx.pm.hook.get_FLAT_CONFIG().SAVE_ARCHIVE_DOT_ORG link_info = link._asdict(extended=True) @@ -102,7 +105,7 @@ def link_details_template(link: Link) -> str: 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', 'oldest_archive_date': ts_to_date_str(link.oldest_archive_date), - 'SAVE_ARCHIVE_DOT_ORG': ARCHIVEDOTORG_CONFIG.SAVE_ARCHIVE_DOT_ORG, + 'SAVE_ARCHIVE_DOT_ORG': SAVE_ARCHIVE_DOT_ORG, 'PREVIEW_ORIGINALS': SERVER_CONFIG.PREVIEW_ORIGINALS, }) diff --git a/archivebox/index/json.py b/archivebox/index/json.py index 8671369a..0a484c75 100644 --- a/archivebox/index/json.py +++ b/archivebox/index/json.py @@ -8,6 +8,8 @@ from pathlib import Path from datetime import datetime, timezone from typing import List, Optional, Iterator, Any, Union +import abx + from archivebox.config import VERSION, DATA_DIR, CONSTANTS from archivebox.config.common import SERVER_CONFIG, SHELL_CONFIG @@ -19,8 +21,6 @@ from archivebox.misc.util import enforce_types @enforce_types def generate_json_index_from_links(links: List[Link], with_headers: bool): - from django.conf import settings - MAIN_INDEX_HEADER = { 'info': 'This is an index of site data archived by ArchiveBox: The self-hosted web archive.', 'schema': 'archivebox.index.json', @@ -33,11 +33,10 @@ def generate_json_index_from_links(links: List[Link], with_headers: bool): 'docs': 'https://github.com/ArchiveBox/ArchiveBox/wiki', 'source': 'https://github.com/ArchiveBox/ArchiveBox', 'issues': 'https://github.com/ArchiveBox/ArchiveBox/issues', - 'dependencies': settings.BINARIES, + 'dependencies': dict(abx.pm.hook.get_BINARIES()), }, } - if with_headers: output = { **MAIN_INDEX_HEADER, diff --git a/archivebox/index/schema.py b/archivebox/index/schema.py index a3c0e967..78e80ef9 100644 --- a/archivebox/index/schema.py +++ b/archivebox/index/schema.py @@ -17,9 +17,9 @@ from dataclasses import dataclass, asdict, field, fields from django.utils.functional import cached_property -from archivebox.config import ARCHIVE_DIR, CONSTANTS +import abx -from plugins_extractor.favicon.config import FAVICON_CONFIG +from archivebox.config import ARCHIVE_DIR, CONSTANTS from archivebox.misc.system import get_dir_size from archivebox.misc.util import ts_to_date_str, parse_date @@ -426,7 +426,10 @@ class Link: def canonical_outputs(self) -> Dict[str, Optional[str]]: """predict the expected output paths that should be present after archiving""" - from ..extractors.wget import wget_output_path + from abx_plugin_wget.wget import wget_output_path + + FAVICON_CONFIG = abx.pm.hook.get_CONFIGS().favicon + # TODO: banish this awful duplication from the codebase and import these # from their respective extractor files canonical = { diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py index 229e1d83..7686b73e 100644 --- a/archivebox/machine/models.py +++ b/archivebox/machine/models.py @@ -8,9 +8,10 @@ from django.db import models from django.utils import timezone from django.utils.functional import cached_property -import abx.archivebox.reads +import abx +import archivebox -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider +from pydantic_pkgr import Binary, BinProvider from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats @@ -180,7 +181,7 @@ class NetworkInterface(ABIDModel, ModelWithHealthStats): class InstalledBinaryManager(models.Manager): - def get_from_db_or_cache(self, binary: BaseBinary) -> 'InstalledBinary': + def get_from_db_or_cache(self, binary: Binary) -> 'InstalledBinary': """Get or create an InstalledBinary record for a Binary on the local machine""" global _CURRENT_BINARIES @@ -216,7 +217,7 @@ class InstalledBinaryManager(models.Manager): # if binary was not yet loaded from filesystem, do it now # this is expensive, we have to find it's abspath, version, and sha256, but it's necessary # to make sure we have a good, up-to-date record of it in the DB & in-memroy cache - binary = binary.load(fresh=True) + binary = archivebox.pm.hook.binary_load(binary=binary, fresh=True) assert binary.loaded_binprovider and binary.loaded_abspath and binary.loaded_version and binary.loaded_sha256, f'Failed to load binary {binary.name} abspath, version, and sha256' @@ -291,8 +292,8 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): if not hasattr(self, 'machine'): self.machine = Machine.objects.current() if not self.binprovider: - all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values()) - binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True) + all_known_binproviders = list(abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values()) + binary = archivebox.pm.hook.binary_load(binary=Binary(name=self.name, binproviders=all_known_binproviders), fresh=True) self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None if not self.abspath: self.abspath = self.BINPROVIDER.get_abspath(self.name) @@ -304,16 +305,16 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): super().clean(*args, **kwargs) @cached_property - def BINARY(self) -> BaseBinary: - for binary in abx.archivebox.reads.get_BINARIES().values(): + def BINARY(self) -> Binary: + for binary in abx.as_dict(archivebox.pm.hook.get_BINARIES()).values(): if binary.name == self.name: return binary raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it') # TODO: we could technically reconstruct it from scratch, but why would we ever want to do that? @cached_property - def BINPROVIDER(self) -> BaseBinProvider: - for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values(): + def BINPROVIDER(self) -> BinProvider: + for binprovider in abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()).values(): if binprovider.name == self.binprovider: return binprovider raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})') @@ -321,7 +322,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): # maybe not a good idea to provide this? Binary in DB is a record of the binary's config # whereas a loaded binary is a not-yet saved instance that may not have the same config # why would we want to load a binary record from the db when it could be freshly loaded? - def load_from_db(self) -> BaseBinary: + def load_from_db(self) -> Binary: # TODO: implement defaults arg in pydantic_pkgr # return self.BINARY.load(defaults={ # 'binprovider': self.BINPROVIDER, @@ -330,7 +331,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): # 'sha256': self.sha256, # }) - return BaseBinary.model_validate({ + return Binary.model_validate({ **self.BINARY.model_dump(), 'abspath': self.abspath and Path(self.abspath), 'version': self.version, @@ -340,5 +341,5 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): 'overrides': self.BINARY.overrides, }) - def load_fresh(self) -> BaseBinary: - return self.BINARY.load(fresh=True) + def load_fresh(self) -> Binary: + return archivebox.pm.hook.binary_load(binary=self.BINARY, fresh=True) diff --git a/archivebox/main.py b/archivebox/main.py index a3db809f..9ce0b9bd 100755 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -14,6 +14,10 @@ from crontab import CronTab, CronSlices from django.db.models import QuerySet from django.utils import timezone +from pydantic_pkgr import Binary + +import abx +import archivebox from archivebox.misc.checks import check_data_folder from archivebox.misc.util import enforce_types # type: ignore from archivebox.misc.system import get_dir_size, dedupe_cron_jobs, CRON_COMMENT @@ -22,7 +26,7 @@ from archivebox.misc.logging import stderr, hint from archivebox.config import CONSTANTS, VERSION, DATA_DIR, ARCHIVE_DIR from archivebox.config.common import SHELL_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG, SERVER_CONFIG, ARCHIVING_CONFIG from archivebox.config.permissions import SudoPermission, IN_DOCKER -from archivebox.config.configfile import ( +from archivebox.config.collection import ( write_config_file, load_all_config, get_real_name, @@ -195,15 +199,13 @@ def version(quiet: bool=False, console = Console() prnt = console.print - from django.conf import settings - - from abx.archivebox.base_binary import BaseBinary, apt, brew, env + from abx_plugin_default_binproviders import apt, brew, env from archivebox.config.version import get_COMMIT_HASH, get_BUILD_TIME from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP, RUNNING_AS_UID, RUNNING_AS_GID from archivebox.config.paths import get_data_locations, get_code_locations - from plugins_auth.ldap.config import LDAP_CONFIG + LDAP_ENABLED = archivebox.pm.hook.get_SCOPE_CONFIG().LDAP_ENABLED # 0.7.1 @@ -242,7 +244,7 @@ def version(quiet: bool=False, f'SUDO={CONSTANTS.IS_ROOT}', f'ID={CONSTANTS.MACHINE_ID}:{CONSTANTS.COLLECTION_ID}', f'SEARCH_BACKEND={SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE}', - f'LDAP={LDAP_CONFIG.LDAP_ENABLED}', + f'LDAP={LDAP_ENABLED}', #f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually ) prnt() @@ -264,7 +266,8 @@ def version(quiet: bool=False, prnt('[pale_green1][i] Binary Dependencies:[/pale_green1]') failures = [] - for name, binary in list(settings.BINARIES.items()): + BINARIES = abx.as_dict(archivebox.pm.hook.get_BINARIES()) + for name, binary in list(BINARIES.items()): if binary.name == 'archivebox': continue @@ -295,14 +298,15 @@ def version(quiet: bool=False, prnt() prnt('[gold3][i] Package Managers:[/gold3]') - for name, binprovider in list(settings.BINPROVIDERS.items()): + BINPROVIDERS = abx.as_dict(archivebox.pm.hook.get_BINPROVIDERS()) + for name, binprovider in list(BINPROVIDERS.items()): err = None if binproviders and binprovider.name not in binproviders: continue # TODO: implement a BinProvider.BINARY() method that gets the loaded binary for a binprovider's INSTALLER_BIN - loaded_bin = binprovider.INSTALLER_BINARY or BaseBinary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew]) + loaded_bin = binprovider.INSTALLER_BINARY or Binary(name=binprovider.INSTALLER_BIN, binproviders=[env, apt, brew]) abspath = None if loaded_bin.abspath: @@ -1050,9 +1054,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina # - recommend user re-run with sudo if any deps need to be installed as root from rich import print - from django.conf import settings - from archivebox import CONSTANTS from archivebox.config.permissions import IS_ROOT, ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.config.paths import get_or_create_working_lib_dir @@ -1075,11 +1077,11 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina package_manager_names = ', '.join( f'[yellow]{binprovider.name}[/yellow]' - for binprovider in list(settings.BINPROVIDERS.values()) + for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())) if not binproviders or (binproviders and binprovider.name in binproviders) ) print(f'[+] Setting up package managers {package_manager_names}...') - for binprovider in list(settings.BINPROVIDERS.values()): + for binprovider in reversed(list(abx.as_dict(abx.pm.hook.get_BINPROVIDERS()).values())): if binproviders and binprovider.name not in binproviders: continue try: @@ -1092,7 +1094,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print() - for binary in list(settings.BINARIES.values()): + for binary in reversed(list(abx.as_dict(abx.pm.hook.get_BINARIES()).values())): if binary.name in ('archivebox', 'django', 'sqlite', 'python'): # obviously must already be installed if we are running continue @@ -1122,7 +1124,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina result = binary.install(binproviders=[binprovider_name], dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) sys.stderr.write("\033[00m\n") # reset else: - result = binary.load_or_install(binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) + loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, binproviders=[binprovider_name], fresh=True, dry_run=dry_run, quiet=False) + result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) if result and result['loaded_version']: break except Exception as e: @@ -1133,7 +1136,8 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina binary.install(dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) sys.stderr.write("\033[00m\n") # reset else: - binary.load_or_install(fresh=True, dry_run=dry_run).model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) + loaded_binary = archivebox.pm.hook.binary_load_or_install(binary=binary, fresh=True, dry_run=dry_run) + result = loaded_binary.model_dump(exclude={'overrides', 'bin_dir', 'hook_type'}) if IS_ROOT and LIB_DIR: with SudoPermission(uid=0): if ARCHIVEBOX_USER == 0: @@ -1157,7 +1161,7 @@ def install(out_dir: Path=DATA_DIR, binproviders: Optional[List[str]]=None, bina print('\n[green][√] Set up ArchiveBox and its dependencies successfully.[/green]\n', file=sys.stderr) - from plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY + from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY extra_args = [] if binproviders: @@ -1183,8 +1187,6 @@ def config(config_options_str: Optional[str]=None, out_dir: Path=DATA_DIR) -> None: """Get and set your ArchiveBox project configuration values""" - import abx.archivebox.reads - from rich import print check_data_folder() @@ -1198,7 +1200,8 @@ def config(config_options_str: Optional[str]=None, elif config_options_str: config_options = config_options_str.split('\n') - from django.conf import settings + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() + CONFIGS = archivebox.pm.hook.get_CONFIGS() config_options = config_options or [] @@ -1208,8 +1211,8 @@ def config(config_options_str: Optional[str]=None, if search: if config_options: config_options = [get_real_name(key) for key in config_options] - matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} - for config_section in settings.CONFIGS.values(): + matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG} + for config_section in CONFIGS.values(): aliases = config_section.aliases for search_key in config_options: @@ -1228,15 +1231,15 @@ def config(config_options_str: Optional[str]=None, elif get or no_args: if config_options: config_options = [get_real_name(key) for key in config_options] - matching_config = {key: settings.FLAT_CONFIG[key] for key in config_options if key in settings.FLAT_CONFIG} - failed_config = [key for key in config_options if key not in settings.FLAT_CONFIG] + matching_config = {key: FLAT_CONFIG[key] for key in config_options if key in FLAT_CONFIG} + failed_config = [key for key in config_options if key not in FLAT_CONFIG] if failed_config: stderr() stderr('[X] These options failed to get', color='red') stderr(' {}'.format('\n '.join(config_options))) raise SystemExit(1) else: - matching_config = settings.FLAT_CONFIG + matching_config = FLAT_CONFIG print(printable_config(matching_config)) raise SystemExit(not matching_config) @@ -1257,20 +1260,20 @@ def config(config_options_str: Optional[str]=None, if key != raw_key: stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow') - if key in settings.FLAT_CONFIG: + if key in FLAT_CONFIG: new_config[key] = val.strip() else: failed_options.append(line) if new_config: - before = settings.FLAT_CONFIG + before = FLAT_CONFIG matching_config = write_config_file(new_config) - after = {**load_all_config(), **abx.archivebox.reads.get_FLAT_CONFIG()} + after = {**load_all_config(), **archivebox.pm.hook.get_FLAT_CONFIG()} print(printable_config(matching_config)) side_effect_changes = {} for key, val in after.items(): - if key in settings.FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config): + if key in FLAT_CONFIG and (str(before[key]) != str(after[key])) and (key not in matching_config): side_effect_changes[key] = after[key] # import ipdb; ipdb.set_trace() @@ -1312,7 +1315,7 @@ def schedule(add: bool=False, """Set ArchiveBox to regularly import URLs at specific times using cron""" check_data_folder() - from archivebox.plugins_pkg.pip.binaries import ARCHIVEBOX_BINARY + from abx_plugin_pip.binaries import ARCHIVEBOX_BINARY from archivebox.config.permissions import USER Path(CONSTANTS.LOGS_DIR).mkdir(exist_ok=True) diff --git a/archivebox/misc/checks.py b/archivebox/misc/checks.py index b0322a1e..8a2894fe 100644 --- a/archivebox/misc/checks.py +++ b/archivebox/misc/checks.py @@ -201,6 +201,7 @@ def check_tmp_dir(tmp_dir=None, throw=False, quiet=False, must_exist=True): def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_exist=True): + import archivebox from archivebox.config.permissions import ARCHIVEBOX_USER, ARCHIVEBOX_GROUP from archivebox.misc.logging import STDERR from archivebox.config.paths import dir_is_writable, get_or_create_working_lib_dir @@ -209,6 +210,8 @@ def check_lib_dir(lib_dir: Path | None = None, throw=False, quiet=False, must_ex lib_dir = lib_dir or STORAGE_CONFIG.LIB_DIR + assert lib_dir == archivebox.pm.hook.get_LIB_DIR(), "lib_dir is not the same as the one in the flat config" + if not must_exist and not os.path.isdir(lib_dir): return True diff --git a/archivebox/misc/shell_welcome_message.py b/archivebox/misc/shell_welcome_message.py index 5b85e6bd..26314dc0 100644 --- a/archivebox/misc/shell_welcome_message.py +++ b/archivebox/misc/shell_welcome_message.py @@ -23,7 +23,7 @@ from archivebox import CONSTANTS # noqa from ..main import * # noqa from ..cli import CLI_SUBCOMMANDS -CONFIG = settings.FLAT_CONFIG +CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() CLI_COMMAND_NAMES = ", ".join(CLI_SUBCOMMANDS.keys()) if __name__ == '__main__': @@ -55,6 +55,5 @@ if __name__ == '__main__': prnt(' add[blink][deep_sky_blue4]?[/deep_sky_blue4][/blink] [grey53]# add ? after anything to get help[/]') prnt(' add("https://example.com/some/new/url") [grey53]# call CLI methods from the shell[/]') prnt(' snap = Snapshot.objects.filter(url__contains="https://example.com").last() [grey53]# query for individual snapshots[/]') - prnt(' archivebox.plugins_extractor.wget.apps.WGET_EXTRACTOR.extract(snap.id) [grey53]# call an extractor directly[/]') prnt(' snap.archiveresult_set.all() [grey53]# see extractor results[/]') prnt(' bool(re.compile(CONFIG.URL_DENYLIST).search("https://example.com/abc.exe")) [grey53]# test out a config change[/]') diff --git a/archivebox/misc/util.py b/archivebox/misc/util.py index a856fe64..6195252e 100644 --- a/archivebox/misc/util.py +++ b/archivebox/misc/util.py @@ -5,7 +5,7 @@ import requests import json as pyjson import http.cookiejar -from typing import List, Optional, Any +from typing import List, Optional, Any, Callable from pathlib import Path from inspect import signature from functools import wraps @@ -19,14 +19,13 @@ from requests.exceptions import RequestException, ReadTimeout from base32_crockford import encode as base32_encode # type: ignore from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding try: - import chardet + import chardet # type:ignore detect_encoding = lambda rawdata: chardet.detect(rawdata)["encoding"] except ImportError: detect_encoding = lambda rawdata: "utf-8" -from archivebox.config import CONSTANTS -from archivebox.config.common import ARCHIVING_CONFIG +from archivebox.config.constants import CONSTANTS from .logging import COLOR_DICT @@ -126,6 +125,7 @@ def is_static_file(url: str): def enforce_types(func): """ Enforce function arg and kwarg types at runtime using its python3 type hints + Simpler version of pydantic @validate_call decorator """ # TODO: check return type as well @@ -186,11 +186,11 @@ def str_between(string: str, start: str, end: str=None) -> str: @enforce_types -def parse_date(date: Any) -> Optional[datetime]: +def parse_date(date: Any) -> datetime: """Parse unix timestamps, iso format, and human-readable strings""" if date is None: - return None + return None # type: ignore if isinstance(date, datetime): if date.tzinfo is None: @@ -212,6 +212,8 @@ def parse_date(date: Any) -> Optional[datetime]: def download_url(url: str, timeout: int=None) -> str: """Download the contents of a remote url and return the text""" + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT session = requests.Session() @@ -241,8 +243,12 @@ def download_url(url: str, timeout: int=None) -> str: return url.rsplit('/', 1)[-1] @enforce_types -def get_headers(url: str, timeout: int=None) -> str: +def get_headers(url: str, timeout: int | None=None) -> str: """Download the contents of a remote url and return the headers""" + # TODO: get rid of this and use an abx pluggy hook instead + + from archivebox.config.common import ARCHIVING_CONFIG + timeout = timeout or ARCHIVING_CONFIG.TIMEOUT try: @@ -283,6 +289,7 @@ def get_headers(url: str, timeout: int=None) -> str: def ansi_to_html(text: str) -> str: """ Based on: https://stackoverflow.com/questions/19212665/python-converting-ansi-color-codes-to-html + Simple way to render colored CLI stdout/stderr in HTML properly, Textual/rich is probably better though. """ TEMPLATE = '
' @@ -306,13 +313,13 @@ def ansi_to_html(text: str) -> str: @enforce_types def dedupe(options: List[str]) -> List[str]: """ - Deduplicates the given options. Options that come later clobber earlier - conflicting options. + Deduplicates the given CLI args by key=value. Options that come later override earlier. """ deduped = {} for option in options: - deduped[option.split('=')[0]] = option + key = option.split('=')[0] + deduped[key] = option return list(deduped.values()) @@ -344,6 +351,9 @@ class ExtendedEncoder(pyjson.JSONEncoder): elif cls_name in ('dict_items', 'dict_keys', 'dict_values'): return tuple(obj) + + elif isinstance(obj, Callable): + return str(obj) return pyjson.JSONEncoder.default(self, obj) diff --git a/archivebox/parsers/generic_jsonl.py b/archivebox/parsers/generic_jsonl.py index 3af7356b..3948ba18 100644 --- a/archivebox/parsers/generic_jsonl.py +++ b/archivebox/parsers/generic_jsonl.py @@ -1,14 +1,11 @@ __package__ = 'archivebox.parsers' import json - from typing import IO, Iterable -from ..index.schema import Link -from archivebox.misc.util import ( - enforce_types, -) +from archivebox.misc.util import enforce_types +from ..index.schema import Link from .generic_json import jsonObjectToLink def parse_line(line: str): diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index 9b88d958..52dbba17 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -6,8 +6,7 @@ import re from typing import IO, Iterable, Optional from configparser import ConfigParser -from pocket import Pocket - +import archivebox from archivebox.config import CONSTANTS from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write @@ -22,7 +21,7 @@ API_DB_PATH = CONSTANTS.SOURCES_DIR / 'pocket_api.db' _BROKEN_PROTOCOL_RE = re.compile('^(http[s]?)(:/(?!/))') -def get_pocket_articles(api: Pocket, since=None, page=0): +def get_pocket_articles(api, since=None, page=0): body, headers = api.get( state='archive', sort='oldest', @@ -94,7 +93,9 @@ def should_parse_as_pocket_api(text: str) -> bool: def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: """Parse bookmarks from the Pocket API""" - from archivebox.plugins_extractor.pocket.config import POCKET_CONFIG + from pocket import Pocket + + FLAT_CONFIG = archivebox.pm.hook.get_FLAT_CONFIG() input_buffer.seek(0) pattern = re.compile(r"^pocket:\/\/(\w+)") @@ -102,7 +103,7 @@ def parse_pocket_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: if should_parse_as_pocket_api(line): username = pattern.search(line).group(1) - api = Pocket(POCKET_CONFIG.POCKET_CONSUMER_KEY, POCKET_CONFIG.POCKET_ACCESS_TOKENS[username]) + api = Pocket(FLAT_CONFIG.POCKET_CONSUMER_KEY, FLAT_CONFIG.POCKET_ACCESS_TOKENS[username]) api.last_since = None for article in get_pocket_articles(api, since=read_since(username)): diff --git a/archivebox/parsers/readwise_reader_api.py b/archivebox/parsers/readwise_reader_api.py index ad464537..20a792f3 100644 --- a/archivebox/parsers/readwise_reader_api.py +++ b/archivebox/parsers/readwise_reader_api.py @@ -8,9 +8,10 @@ from datetime import datetime from typing import IO, Iterable, Optional from configparser import ConfigParser +import abx + from archivebox.misc.util import enforce_types from archivebox.misc.system import atomic_write -from archivebox.plugins_extractor.readwise.config import READWISE_CONFIG from ..index.schema import Link @@ -62,26 +63,30 @@ def link_from_article(article: dict, sources: list): def write_cursor(username: str, since: str): - if not READWISE_CONFIG.READWISE_DB_PATH.exists(): - atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "") + READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH + + if not READWISE_DB_PATH.exists(): + atomic_write(READWISE_DB_PATH, "") since_file = ConfigParser() since_file.optionxform = str - since_file.read(READWISE_CONFIG.READWISE_DB_PATH) + since_file.read(READWISE_DB_PATH) since_file[username] = {"since": since} - with open(READWISE_CONFIG.READWISE_DB_PATH, "w+") as new: + with open(READWISE_DB_PATH, "w+") as new: since_file.write(new) def read_cursor(username: str) -> Optional[str]: - if not READWISE_CONFIG.READWISE_DB_PATH.exists(): - atomic_write(READWISE_CONFIG.READWISE_DB_PATH, "") + READWISE_DB_PATH = abx.pm.hook.get_CONFIG().READWISE_DB_PATH + + if not READWISE_DB_PATH.exists(): + atomic_write(READWISE_DB_PATH, "") config_file = ConfigParser() config_file.optionxform = str - config_file.read(READWISE_CONFIG.READWISE_DB_PATH) + config_file.read(READWISE_DB_PATH) return config_file.get(username, "since", fallback=None) @@ -97,12 +102,14 @@ def should_parse_as_readwise_reader_api(text: str) -> bool: def parse_readwise_reader_api_export(input_buffer: IO[str], **_kwargs) -> Iterable[Link]: """Parse bookmarks from the Readwise Reader API""" + READWISE_READER_TOKENS = abx.pm.hook.get_CONFIG().READWISE_READER_TOKENS + input_buffer.seek(0) pattern = re.compile(r"^readwise-reader:\/\/(\w+)") for line in input_buffer: if should_parse_as_readwise_reader_api(line): username = pattern.search(line).group(1) - api = ReadwiseReaderAPI(READWISE_CONFIG.READWISE_READER_TOKENS[username], cursor=read_cursor(username)) + api = ReadwiseReaderAPI(READWISE_READER_TOKENS[username], cursor=read_cursor(username)) for article in get_readwise_reader_articles(api): yield link_from_article(article, sources=[line]) diff --git a/archivebox/pkgs/__init__.py b/archivebox/pkgs/__init__.py new file mode 100644 index 00000000..c5f4cc82 --- /dev/null +++ b/archivebox/pkgs/__init__.py @@ -0,0 +1,39 @@ +import sys +import importlib +from pathlib import Path + +PKGS_DIR = Path(__file__).parent + +VENDORED_PKGS = [ + 'abx', + # 'pydantic-pkgr', +] + +# scan ./pkgs and add all dirs present to list of available VENDORED_PKGS +for subdir in reversed(sorted(PKGS_DIR.iterdir())): + if subdir.is_dir() and subdir.name not in VENDORED_PKGS and not subdir.name.startswith('_'): + VENDORED_PKGS.append(subdir.name) + + +def load_vendored_pkgs(): + """Add archivebox/vendor to sys.path and import all vendored libraries present within""" + if str(PKGS_DIR) not in sys.path: + sys.path.append(str(PKGS_DIR)) + + for pkg_name in VENDORED_PKGS: + pkg_dir = PKGS_DIR / pkg_name + assert pkg_dir.is_dir(), f'Required vendored pkg {pkg_name} could not be found in {pkg_dir}' + + try: + lib = importlib.import_module(pkg_name) + # print(f"Successfully imported lib from environment {pkg_name}") + except ImportError: + sys.path.append(str(pkg_dir)) + try: + lib = importlib.import_module(pkg_name) + # print(f"Successfully imported lib from vendored fallback {pkg_name}: {inspect.getfile(lib)}") + except ImportError as e: + print(f"Failed to import lib from environment or vendored fallback {pkg_name}: {e}", file=sys.stderr) + sys.exit(1) + + diff --git a/archivebox/plugins_pkg/__init__.py b/archivebox/pkgs/abx-plugin-archivedotorg/README.md similarity index 100% rename from archivebox/plugins_pkg/__init__.py rename to archivebox/pkgs/abx-plugin-archivedotorg/README.md diff --git a/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py new file mode 100644 index 00000000..025d83bf --- /dev/null +++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/__init__.py @@ -0,0 +1,21 @@ +__label__ = 'Archive.org' +__homepage__ = 'https://archive.org' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import ARCHIVEDOTORG_CONFIG + + return { + 'ARCHIVEDOTORG_CONFIG': ARCHIVEDOTORG_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import ARCHIVEDOTORG_EXTRACTOR +# +# return { +# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR, +# } diff --git a/archivebox/extractors/archive_org.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py similarity index 100% rename from archivebox/extractors/archive_org.py rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/archive_org.py diff --git a/archivebox/plugins_extractor/archivedotorg/config.py b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py similarity index 54% rename from archivebox/plugins_extractor/archivedotorg/config.py rename to archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py index bebb6c98..f4c146ab 100644 --- a/archivebox/plugins_extractor/archivedotorg/config.py +++ b/archivebox/pkgs/abx-plugin-archivedotorg/abx_plugin_archivedotorg/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.archivedotorg' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class ArchivedotorgConfig(BaseConfigSet): diff --git a/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml new file mode 100644 index 00000000..36c91f3c --- /dev/null +++ b/archivebox/pkgs/abx-plugin-archivedotorg/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-archivedotorg" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_archivedotorg = "abx_plugin_archivedotorg" diff --git a/archivebox/plugins_search/__init__.py b/archivebox/pkgs/abx-plugin-chrome/README.md similarity index 100% rename from archivebox/plugins_search/__init__.py rename to archivebox/pkgs/abx-plugin-chrome/README.md diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py new file mode 100644 index 00000000..c300bd13 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/__init__.py @@ -0,0 +1,34 @@ +__label__ = 'Chrome' +__author__ = 'ArchiveBox' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import CHROME_CONFIG + + return { + 'CHROME_CONFIG': CHROME_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import CHROME_BINARY + + return { + 'chrome': CHROME_BINARY, + } + +@abx.hookimpl +def ready(): + from .config import CHROME_CONFIG + CHROME_CONFIG.validate() + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# return { +# 'pdf': PDF_EXTRACTOR, +# 'screenshot': SCREENSHOT_EXTRACTOR, +# 'dom': DOM_EXTRACTOR, +# } diff --git a/archivebox/plugins_extractor/chrome/binaries.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py similarity index 82% rename from archivebox/plugins_extractor/chrome/binaries.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py index 59573d93..f315c992 100644 --- a/archivebox/plugins_extractor/chrome/binaries.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/binaries.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_extractor.chrome' - import os import platform from pathlib import Path @@ -7,21 +5,22 @@ from typing import List, Optional from pydantic import InstanceOf from pydantic_pkgr import ( + Binary, BinProvider, BinName, BinaryOverrides, bin_abspath, ) -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +import abx -# Depends on Other Plugins: -from archivebox.config.common import SHELL_CONFIG -from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER -from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER +from abx_plugin_default_binproviders import apt, brew, env +from abx_plugin_puppeteer.binproviders import PUPPETEER_BINPROVIDER +from abx_plugin_playwright.binproviders import PLAYWRIGHT_BINPROVIDER from .config import CHROME_CONFIG + CHROMIUM_BINARY_NAMES_LINUX = [ "chromium", "chromium-browser", @@ -48,12 +47,13 @@ CHROME_BINARY_NAMES_MACOS = [ ] CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS -APT_DEPENDENCIES = [ - 'apt-transport-https', 'at-spi2-common', 'chromium-browser', +CHROME_APT_DEPENDENCIES = [ + 'apt-transport-https', 'at-spi2-common', 'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei', 'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2', 'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1', 'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings', + 'chromium-browser', ] @@ -80,7 +80,7 @@ def create_macos_app_symlink(target: Path, shortcut: Path): ###################### Config ########################## -class ChromeBinary(BaseBinary): +class ChromeBinary(Binary): name: BinName = CHROME_CONFIG.CHROME_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [PUPPETEER_BINPROVIDER, env, PLAYWRIGHT_BINPROVIDER, apt, brew] @@ -95,7 +95,7 @@ class ChromeBinary(BaseBinary): 'packages': ['chromium'], # playwright install chromium }, apt.name: { - 'packages': APT_DEPENDENCIES, + 'packages': CHROME_APT_DEPENDENCIES, }, brew.name: { 'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [], @@ -104,10 +104,9 @@ class ChromeBinary(BaseBinary): @staticmethod def symlink_to_lib(binary, bin_dir=None) -> None: - from archivebox.config.common import STORAGE_CONFIG - bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin' + bin_dir = bin_dir or abx.pm.hook.get_BIN_DIR() - if not (binary.abspath and os.access(binary.abspath, os.F_OK)): + if not (binary.abspath and os.path.isfile(binary.abspath)): return bin_dir.mkdir(parents=True, exist_ok=True) @@ -121,7 +120,7 @@ class ChromeBinary(BaseBinary): # otherwise on linux we can symlink directly to binary executable symlink.unlink(missing_ok=True) symlink.symlink_to(binary.abspath) - except Exception as err: + except Exception: # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') # not actually needed, we can just run without it pass @@ -132,14 +131,17 @@ class ChromeBinary(BaseBinary): Cleans up any state or runtime files that chrome leaves behind when killed by a timeout or other error """ - lock_file = Path("~/.config/chromium/SingletonLock").expanduser() - - if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK): - lock_file.unlink() + try: + linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser() + linux_lock_file.unlink(missing_ok=True) + except Exception: + pass if CHROME_CONFIG.CHROME_USER_DATA_DIR: - if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK): - lock_file.unlink() + try: + (CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True) + except Exception: + pass diff --git a/archivebox/plugins_extractor/chrome/config.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py similarity index 86% rename from archivebox/plugins_extractor/chrome/config.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py index a28c530f..6883cdd1 100644 --- a/archivebox/plugins_extractor/chrome/config.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_extractor.chrome' - import os from pathlib import Path from typing import List, Optional @@ -7,8 +5,8 @@ from typing import List, Optional from pydantic import Field from pydantic_pkgr import bin_abspath -from abx.archivebox.base_configset import BaseConfigSet -from abx.archivebox.base_binary import env +from abx_spec_config.base_configset import BaseConfigSet +from abx_plugin_default_binproviders import env from archivebox.config import CONSTANTS from archivebox.config.common import ARCHIVING_CONFIG, SHELL_CONFIG @@ -81,15 +79,16 @@ class ChromeConfig(BaseConfigSet): # Chrome Binary CHROME_BINARY: str = Field(default='chrome') CHROME_DEFAULT_ARGS: List[str] = Field(default=[ - '--virtual-time-budget=15000', - '--disable-features=DarkMode', - "--run-all-compositor-stages-before-draw", - "--hide-scrollbars", - "--autoplay-policy=no-user-gesture-required", - "--no-first-run", - "--use-fake-ui-for-media-stream", - "--use-fake-device-for-media-stream", - "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'", + "--no-first-run", # dont show any first run ui / setup prompts + '--virtual-time-budget=15000', # accellerate any animations on the page by 15s into the future + '--disable-features=DarkMode', # disable dark mode for archiving + "--run-all-compositor-stages-before-draw", # dont draw partially rendered content, wait until everything is ready + "--hide-scrollbars", # hide scrollbars to prevent layout shift / scrollbar visible in screenshots + "--autoplay-policy=no-user-gesture-required", # allow media autoplay without user gesture (e.g. on mobile) + "--use-fake-ui-for-media-stream", # provide fake camera if site tries to request camera access + "--use-fake-device-for-media-stream", # provide fake camera if site tries to request camera access + "--simulate-outdated-no-au='Tue, 31 Dec 2099 23:59:59 GMT'", # ignore chrome updates + "--force-gpu-mem-available-mb=4096", # allows for longer full page screenshots https://github.com/puppeteer/puppeteer/issues/5530 ]) CHROME_EXTRA_ARGS: List[str] = Field(default=[]) @@ -196,6 +195,7 @@ class ChromeConfig(BaseConfigSet): cmd_args.append('--user-data-dir={}'.format(options.CHROME_USER_DATA_DIR)) cmd_args.append('--profile-directory={}'.format(options.CHROME_PROFILE_NAME or 'Default')) + # if CHROME_USER_DATA_DIR is set but folder is empty, create a new profile inside it if not os.path.isfile(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME / 'Preferences'): STDERR.print(f'[green] + creating new Chrome profile in: {pretty_path(options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME)}[/green]') cmd_args.remove('--no-first-run') diff --git a/archivebox/extractors/dom.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py similarity index 100% rename from archivebox/extractors/dom.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/dom.py diff --git a/archivebox/extractors/pdf.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py similarity index 100% rename from archivebox/extractors/pdf.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/pdf.py diff --git a/archivebox/extractors/screenshot.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py similarity index 100% rename from archivebox/extractors/screenshot.py rename to archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/screenshot.py diff --git a/archivebox/pkgs/abx-plugin-chrome/pyproject.toml b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml new file mode 100644 index 00000000..da26078d --- /dev/null +++ b/archivebox/pkgs/abx-plugin-chrome/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-chrome" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_chrome = "abx_plugin_chrome" diff --git a/archivebox/pkgs/abx-plugin-curl/README.md b/archivebox/pkgs/abx-plugin-curl/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py new file mode 100644 index 00000000..7988ef5e --- /dev/null +++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/__init__.py @@ -0,0 +1,18 @@ +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import CURL_CONFIG + + return { + 'curl': CURL_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import CURL_BINARY + + return { + 'curl': CURL_BINARY, + } diff --git a/archivebox/plugins_extractor/curl/binaries.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py similarity index 57% rename from archivebox/plugins_extractor/curl/binaries.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py index 41ff9616..32628248 100644 --- a/archivebox/plugins_extractor/curl/binaries.py +++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.curl' +__package__ = 'abx_plugin_curl' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import CURL_CONFIG -class CurlBinary(BaseBinary): +class CurlBinary(Binary): name: BinName = CURL_CONFIG.CURL_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/plugins_extractor/curl/config.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py similarity index 90% rename from archivebox/plugins_extractor/curl/config.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py index 14996f66..69f4a637 100644 --- a/archivebox/plugins_extractor/curl/config.py +++ b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_extractor.curl' +__package__ = 'abx_plugin_curl' from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/extractors/headers.py b/archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py similarity index 100% rename from archivebox/extractors/headers.py rename to archivebox/pkgs/abx-plugin-curl/abx_plugin_curl/headers.py diff --git a/archivebox/pkgs/abx-plugin-curl/pyproject.toml b/archivebox/pkgs/abx-plugin-curl/pyproject.toml new file mode 100644 index 00000000..f3c6ad55 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-curl/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-curl" +version = "2024.10.24" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_curl = "abx_plugin_curl" diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/README.md b/archivebox/pkgs/abx-plugin-default-binproviders/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py new file mode 100644 index 00000000..58dbdac9 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-default-binproviders/abx_plugin_default_binproviders.py @@ -0,0 +1,23 @@ + +import abx + +from typing import Dict + +from pydantic_pkgr import ( + AptProvider, + BrewProvider, + EnvProvider, + BinProvider, +) +apt = APT_BINPROVIDER = AptProvider() +brew = BREW_BINPROVIDER = BrewProvider() +env = ENV_BINPROVIDER = EnvProvider() + + +@abx.hookimpl(tryfirst=True) +def get_BINPROVIDERS() -> Dict[str, BinProvider]: + return { + 'apt': APT_BINPROVIDER, + 'brew': BREW_BINPROVIDER, + 'env': ENV_BINPROVIDER, + } diff --git a/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml new file mode 100644 index 00000000..3f8fec96 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-default-binproviders/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-default-binproviders" +version = "2024.10.24" +description = "Default BinProviders for ABX (apt, brew, env)" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_default_binproviders = "abx_plugin_default_binproviders" diff --git a/archivebox/pkgs/abx-plugin-favicon/README.md b/archivebox/pkgs/abx-plugin-favicon/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py new file mode 100644 index 00000000..75004e3d --- /dev/null +++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/__init__.py @@ -0,0 +1,29 @@ +__label__ = 'Favicon' +__version__ = '2024.10.24' +__author__ = 'ArchiveBox' +__homepage__ = 'https://github.com/ArchiveBox/archivebox' +__dependencies__ = [ + 'abx>=0.1.0', + 'abx-spec-config>=0.1.0', + 'abx-plugin-curl-extractor>=2024.10.24', +] + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import FAVICON_CONFIG + + return { + 'FAVICON_CONFIG': FAVICON_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import FAVICON_EXTRACTOR + +# return { +# 'favicon': FAVICON_EXTRACTOR, +# } diff --git a/archivebox/plugins_extractor/favicon/config.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py similarity index 64% rename from archivebox/plugins_extractor/favicon/config.py rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py index 6073ef87..8b97d758 100644 --- a/archivebox/plugins_extractor/favicon/config.py +++ b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.favicon' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class FaviconConfig(BaseConfigSet): diff --git a/archivebox/extractors/favicon.py b/archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py similarity index 100% rename from archivebox/extractors/favicon.py rename to archivebox/pkgs/abx-plugin-favicon/abx_plugin_favicon/favicon.py diff --git a/archivebox/pkgs/abx-plugin-favicon/pyproject.toml b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml new file mode 100644 index 00000000..cad10890 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-favicon/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-favicon" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.28", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_favicon = "abx_plugin_favicon" diff --git a/archivebox/pkgs/abx-plugin-git/README.md b/archivebox/pkgs/abx-plugin-git/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py new file mode 100644 index 00000000..61c04b9c --- /dev/null +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/__init__.py @@ -0,0 +1,29 @@ +__package__ = 'abx_plugin_git' +__label__ = 'Git' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import GIT_CONFIG + + return { + 'GIT_CONFIG': GIT_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import GIT_BINARY + + return { + 'git': GIT_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import GIT_EXTRACTOR + + return { + 'git': GIT_EXTRACTOR, + } diff --git a/archivebox/plugins_extractor/git/binaries.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py similarity index 57% rename from archivebox/plugins_extractor/git/binaries.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py index 8d990769..f352fd99 100644 --- a/archivebox/plugins_extractor/git/binaries.py +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.git' +__package__ = 'abx_plugin_git' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import GIT_CONFIG -class GitBinary(BaseBinary): +class GitBinary(Binary): name: BinName = GIT_CONFIG.GIT_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/plugins_extractor/git/config.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py similarity index 87% rename from archivebox/plugins_extractor/git/config.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py index 3d890d62..d8a9ca17 100644 --- a/archivebox/plugins_extractor/git/config.py +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/config.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_extractor.git' +__package__ = 'abx_plugin_git' from typing import List from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py new file mode 100644 index 00000000..4863d031 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/extractors.py @@ -0,0 +1,15 @@ +__package__ = 'abx_plugin_git' + +# from pathlib import Path + +# from .binaries import GIT_BINARY + + +# class GitExtractor(BaseExtractor): +# name: ExtractorName = 'git' +# binary: str = GIT_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# return snapshot.as_link() / 'git' + +# GIT_EXTRACTOR = GitExtractor() diff --git a/archivebox/extractors/git.py b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py similarity index 95% rename from archivebox/extractors/git.py rename to archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py index 9ac71d3e..128ba0e7 100644 --- a/archivebox/extractors/git.py +++ b/archivebox/pkgs/abx-plugin-git/abx_plugin_git/git.py @@ -16,8 +16,8 @@ from archivebox.misc.util import ( from ..logging_util import TimedProgress from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError -from archivebox.plugins_extractor.git.config import GIT_CONFIG -from archivebox.plugins_extractor.git.binaries import GIT_BINARY +from abx_plugin_git.config import GIT_CONFIG +from abx_plugin_git.binaries import GIT_BINARY def get_output_path(): diff --git a/archivebox/pkgs/abx-plugin-git/pyproject.toml b/archivebox/pkgs/abx-plugin-git/pyproject.toml new file mode 100644 index 00000000..384599b7 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-git/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-git" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_git = "abx_plugin_git" diff --git a/archivebox/pkgs/abx-plugin-htmltotext/README.md b/archivebox/pkgs/abx-plugin-htmltotext/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py new file mode 100644 index 00000000..ebbc6800 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/__init__.py @@ -0,0 +1,22 @@ +__package__ = 'abx_plugin_htmltotext' +__label__ = 'HTML-to-Text' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import HTMLTOTEXT_CONFIG + + return { + 'HTMLTOTEXT_CONFIG': HTMLTOTEXT_CONFIG + } + + +# @abx.hookimpl +# def get_EXTRACTORS(): +# from .extractors import FAVICON_EXTRACTOR + +# return { +# 'htmltotext': FAVICON_EXTRACTOR, +# } diff --git a/archivebox/plugins_extractor/htmltotext/config.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py similarity index 52% rename from archivebox/plugins_extractor/htmltotext/config.py rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py index 31b9bff5..bd3aabc6 100644 --- a/archivebox/plugins_extractor/htmltotext/config.py +++ b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_extractor.htmltotext' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class HtmltotextConfig(BaseConfigSet): diff --git a/archivebox/extractors/htmltotext.py b/archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py similarity index 100% rename from archivebox/extractors/htmltotext.py rename to archivebox/pkgs/abx-plugin-htmltotext/abx_plugin_htmltotext/htmltotext.py diff --git a/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml b/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml new file mode 100644 index 00000000..46ebaa46 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-htmltotext/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-htmltotext" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_htmltotext = "abx_plugin_htmltotext" diff --git a/archivebox/pkgs/abx-plugin-ldap-auth/README.md b/archivebox/pkgs/abx-plugin-ldap-auth/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_auth/ldap/__init__.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py similarity index 68% rename from archivebox/plugins_auth/ldap/__init__.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py index 6ba43b90..d4ac6431 100644 --- a/archivebox/plugins_auth/ldap/__init__.py +++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/__init__.py @@ -1,36 +1,15 @@ -__package__ = 'plugins_auth.ldap' -__id__ = 'ldap' +__package__ = 'abx_plugin_ldap_auth' __label__ = 'LDAP' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' __homepage__ = 'https://github.com/django-auth-ldap/django-auth-ldap' -__dependencies__ = ['pip'] import abx - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - - - @abx.hookimpl def get_CONFIG(): from .config import LDAP_CONFIG return { - __id__: LDAP_CONFIG + 'LDAP_CONFIG': LDAP_CONFIG } @abx.hookimpl @@ -48,12 +27,12 @@ def create_superuser_from_ldap_user(sender, user=None, ldap_user=None, **kwargs) ArchiveBox requires staff/superuser status to view the admin at all, so we must create a user + set staff and superuser when LDAP authenticates a new person. """ - from django.conf import settings + from .config import LDAP_CONFIG if user is None: return # not authenticated at all - if not user.id and settings.CONFIGS.ldap.LDAP_CREATE_SUPERUSER: + if not user.id and LDAP_CONFIG.LDAP_CREATE_SUPERUSER: user.is_superuser = True # authenticated via LDAP, but user is not set up in DB yet user.is_staff = True @@ -69,9 +48,7 @@ def ready(): LDAP_CONFIG.validate() - from django.conf import settings - - if settings.CONFIGS.ldap.LDAP_ENABLED: + if LDAP_CONFIG.LDAP_ENABLED: # tell django-auth-ldap to call our function when a user is authenticated via LDAP import django_auth_ldap.backend django_auth_ldap.backend.populate_user.connect(create_superuser_from_ldap_user) diff --git a/archivebox/plugins_auth/ldap/binaries.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py similarity index 78% rename from archivebox/plugins_auth/ldap/binaries.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py index cc932183..8ea4776d 100644 --- a/archivebox/plugins_auth/ldap/binaries.py +++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/binaries.py @@ -1,5 +1,4 @@ -__package__ = 'plugins_auth.ldap' - +__package__ = 'abx_plugin_ldap_auth' import inspect @@ -7,12 +6,10 @@ from typing import List from pathlib import Path from pydantic import InstanceOf -from pydantic_pkgr import BinaryOverrides, SemVer +from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider - -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, apt - -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES +from abx_plugin_default_binproviders import apt +from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES from .config import get_ldap_lib @@ -39,10 +36,10 @@ def get_LDAP_LIB_version(): return LDAP_LIB and SemVer(LDAP_LIB.__version__) -class LdapBinary(BaseBinary): +class LdapBinary(Binary): name: str = 'ldap' description: str = 'LDAP Authentication' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt] + binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, apt] overrides: BinaryOverrides = { LIB_PIP_BINPROVIDER.name: { diff --git a/archivebox/plugins_auth/ldap/config.py b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py similarity index 96% rename from archivebox/plugins_auth/ldap/config.py rename to archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py index 2094dc68..451c9da8 100644 --- a/archivebox/plugins_auth/ldap/config.py +++ b/archivebox/pkgs/abx-plugin-ldap-auth/abx_plugin_ldap_auth/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_auth.ldap' +__package__ = 'abx_plugin_ldap_auth' import sys from typing import Dict, List, Optional -from pydantic import Field, model_validator, computed_field +from pydantic import Field, computed_field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet LDAP_LIB = None LDAP_SEARCH = None diff --git a/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml b/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml new file mode 100644 index 00000000..a89d0cbc --- /dev/null +++ b/archivebox/pkgs/abx-plugin-ldap-auth/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-ldap-auth" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-django>=0.1.0", +] + + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + + +[project.entry-points.abx] +abx_plugin_ldap_auth = "abx_plugin_ldap_auth" diff --git a/archivebox/pkgs/abx-plugin-mercury/README.md b/archivebox/pkgs/abx-plugin-mercury/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py new file mode 100644 index 00000000..7b6fcfd6 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/__init__.py @@ -0,0 +1,29 @@ +__package__ = 'abx_plugin_mercury' +__label__ = 'Postlight Parser' +__homepage__ = 'https://github.com/postlight/mercury-parser' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import MERCURY_CONFIG + + return { + 'MERCURY_CONFIG': MERCURY_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import MERCURY_BINARY + + return { + 'mercury': MERCURY_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import MERCURY_EXTRACTOR + + return { + 'mercury': MERCURY_EXTRACTOR, + } diff --git a/archivebox/plugins_extractor/mercury/binaries.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py similarity index 78% rename from archivebox/plugins_extractor/mercury/binaries.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py index b07055fd..f015a7ca 100644 --- a/archivebox/plugins_extractor/mercury/binaries.py +++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/binaries.py @@ -1,18 +1,18 @@ -__package__ = 'plugins_extractor.mercury' +__package__ = 'abx_plugin_mercury' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath +from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary -from abx.archivebox.base_binary import BaseBinary, env +from abx_plugin_default_binproviders import env -from archivebox.plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import MERCURY_CONFIG -class MercuryBinary(BaseBinary): +class MercuryBinary(Binary): name: BinName = MERCURY_CONFIG.MERCURY_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/archivebox/plugins_extractor/mercury/config.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py similarity index 90% rename from archivebox/plugins_extractor/mercury/config.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py index 49c92b73..00fa82a4 100644 --- a/archivebox/plugins_extractor/mercury/config.py +++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_extractor.mercury' +__package__ = 'abx_plugin_mercury' from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG diff --git a/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py new file mode 100644 index 00000000..36a17f3a --- /dev/null +++ b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/extractors.py @@ -0,0 +1,17 @@ +__package__ = 'abx_plugin_mercury' + +# from pathlib import Path + +# from .binaries import MERCURY_BINARY + + + +# class MercuryExtractor(BaseExtractor): +# name: ExtractorName = 'mercury' +# binary: str = MERCURY_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# return snapshot.link_dir / 'mercury' / 'content.html' + + +# MERCURY_EXTRACTOR = MercuryExtractor() diff --git a/archivebox/extractors/mercury.py b/archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py similarity index 100% rename from archivebox/extractors/mercury.py rename to archivebox/pkgs/abx-plugin-mercury/abx_plugin_mercury/mercury.py diff --git a/archivebox/pkgs/abx-plugin-mercury/pyproject.toml b/archivebox/pkgs/abx-plugin-mercury/pyproject.toml new file mode 100644 index 00000000..c740008b --- /dev/null +++ b/archivebox/pkgs/abx-plugin-mercury/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-mercury" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_mercury = "abx_plugin_mercury" diff --git a/archivebox/pkgs/abx-plugin-npm/README.md b/archivebox/pkgs/abx-plugin-npm/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/npm/__init__.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py similarity index 58% rename from archivebox/plugins_pkg/npm/__init__.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py index 921d42e4..d1f56f35 100644 --- a/archivebox/plugins_pkg/npm/__init__.py +++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/__init__.py @@ -1,32 +1,15 @@ -__package__ = 'plugins_pkg.npm' -__version__ = '2024.10.14' -__id__ = 'npm' -__label__ = 'npm' +__label__ = 'NPM' __author__ = 'ArchiveBox' __homepage__ = 'https://www.npmjs.com/' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import NPM_CONFIG - return { - __id__: NPM_CONFIG, + 'NPM_CONFIG': NPM_CONFIG, } @abx.hookimpl diff --git a/archivebox/plugins_pkg/npm/binaries.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py similarity index 72% rename from archivebox/plugins_pkg/npm/binaries.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py index dd9e6214..4f44fc4a 100644 --- a/archivebox/plugins_pkg/npm/binaries.py +++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binaries.py @@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm' from typing import List from pydantic import InstanceOf +from benedict import benedict -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides +from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides + +from abx_plugin_default_binproviders import get_BINPROVIDERS + +DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS()) +env = DEFAULT_BINPROVIDERS.env +apt = DEFAULT_BINPROVIDERS.apt +brew = DEFAULT_BINPROVIDERS.brew -from abx.archivebox.base_binary import BaseBinary, env, apt, brew - - -class NodeBinary(BaseBinary): +class NodeBinary(Binary): name: BinName = 'node' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] @@ -23,7 +28,7 @@ class NodeBinary(BaseBinary): NODE_BINARY = NodeBinary() -class NpmBinary(BaseBinary): +class NpmBinary(Binary): name: BinName = 'npm' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] @@ -35,7 +40,7 @@ class NpmBinary(BaseBinary): NPM_BINARY = NpmBinary() -class NpxBinary(BaseBinary): +class NpxBinary(Binary): name: BinName = 'npx' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py new file mode 100644 index 00000000..dd56e3a9 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/binproviders.py @@ -0,0 +1,38 @@ +import os +from pathlib import Path +from typing import Optional + +from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName + +import abx + +DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm') + +OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin' +NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin' + + +class SystemNpmBinProvider(NpmProvider): + name: BinProviderName = "sys_npm" + + npm_prefix: Optional[Path] = None + + +class LibNpmBinProvider(NpmProvider): + name: BinProviderName = "lib_npm" + PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' + + npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR + + def setup(self) -> None: + # update paths from config at runtime + LIB_DIR = abx.pm.hook.get_LIB_DIR() + self.npm_prefix = LIB_DIR / 'npm' + self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' + + super().setup() + + +SYS_NPM_BINPROVIDER = SystemNpmBinProvider() +LIB_NPM_BINPROVIDER = LibNpmBinProvider() +npm = LIB_NPM_BINPROVIDER diff --git a/archivebox/plugins_pkg/npm/config.py b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py similarity index 79% rename from archivebox/plugins_pkg/npm/config.py rename to archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py index f69cfdd2..b937ed27 100644 --- a/archivebox/plugins_pkg/npm/config.py +++ b/archivebox/pkgs/abx-plugin-npm/abx_plugin_npm/config.py @@ -1,7 +1,4 @@ -__package__ = 'plugins_pkg.npm' - - -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config import BaseConfigSet ###################### Config ########################## diff --git a/archivebox/pkgs/abx-plugin-npm/pyproject.toml b/archivebox/pkgs/abx-plugin-npm/pyproject.toml new file mode 100644 index 00000000..1371b2c4 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-npm/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-npm" +version = "2024.10.24" +description = "NPM binary provider plugin for ABX" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_npm = "abx_plugin_npm" diff --git a/archivebox/pkgs/abx-plugin-pip/README.md b/archivebox/pkgs/abx-plugin-pip/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/pip/.plugin_order b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order similarity index 100% rename from archivebox/plugins_pkg/pip/.plugin_order rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/.plugin_order diff --git a/archivebox/plugins_pkg/pip/__init__.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py similarity index 62% rename from archivebox/plugins_pkg/pip/__init__.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py index c1be27b1..eebcdb5b 100644 --- a/archivebox/plugins_pkg/pip/__init__.py +++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/__init__.py @@ -1,33 +1,18 @@ -__package__ = 'plugins_pkg.pip' -__label__ = 'pip' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/pypa/pip' +__package__ = 'abx_plugin_pip' +__label__ = 'PIP' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'pip': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import PIP_CONFIG return { - 'pip': PIP_CONFIG + 'PIP_CONFIG': PIP_CONFIG } -@abx.hookimpl +@abx.hookimpl(tryfirst=True) def get_BINARIES(): from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY diff --git a/archivebox/plugins_pkg/pip/binaries.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py similarity index 84% rename from archivebox/plugins_pkg/pip/binaries.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py index 3e451cfe..18e5f34f 100644 --- a/archivebox/plugins_pkg/pip/binaries.py +++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binaries.py @@ -1,4 +1,4 @@ -__package__ = 'plugins_pkg.pip' +__package__ = 'abx_plugin_pip' import sys from pathlib import Path @@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator import django import django.db.backends.sqlite3.base from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer +from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer -from archivebox import VERSION -from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew - -from archivebox.misc.logging import hint - -from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew ###################### Config ########################## +def get_archivebox_version(): + try: + from archivebox import VERSION + return VERSION + except Exception: + return None -class ArchiveboxBinary(BaseBinary): +class ArchiveboxBinary(Binary): name: BinName = 'archivebox' binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] overrides: BinaryOverrides = { - VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, - SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION}, - apt.name: {'packages': [], 'version': VERSION}, - brew.name: {'packages': [], 'version': VERSION}, + VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version}, + SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version}, + apt.name: {'packages': [], 'version': get_archivebox_version}, + brew.name: {'packages': [], 'version': get_archivebox_version}, } # @validate_call @@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary): ARCHIVEBOX_BINARY = ArchiveboxBinary() -class PythonBinary(BaseBinary): +class PythonBinary(Binary): name: BinName = 'python' binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] @@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__) LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version) LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) -class SqliteBinary(BaseBinary): +class SqliteBinary(Binary): name: BinName = 'sqlite' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) overrides: BinaryOverrides = { VENV_PIP_BINPROVIDER.name: { "abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None, @@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary): cursor.execute('SELECT JSON(\'{"a": "b"}\')') except django_sqlite3.OperationalError as exc: print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]') - hint([ - 'Upgrade your Python version or install the extension manually:', - 'https://code.djangoproject.com/wiki/JSON1Extension' - ]) + print( + '[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' + + ' https://code.djangoproject.com/wiki/JSON1Extension\n' + ) return self # @validate_call @@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__) LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3]) LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve())) -class DjangoBinary(BaseBinary): +class DjangoBinary(Binary): name: BinName = 'django' - binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) + binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER]) overrides: BinaryOverrides = { VENV_PIP_BINPROVIDER.name: { "abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None, @@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary): DJANGO_BINARY = DjangoBinary() -class PipBinary(BaseBinary): +class PipBinary(Binary): name: BinName = "pip" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] @@ -154,7 +155,7 @@ class PipBinary(BaseBinary): PIP_BINARY = PipBinary() -class PipxBinary(BaseBinary): +class PipxBinary(Binary): name: BinName = "pipx" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] diff --git a/archivebox/plugins_pkg/pip/binproviders.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py similarity index 76% rename from archivebox/plugins_pkg/pip/binproviders.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py index e51dc780..c29798b0 100644 --- a/archivebox/plugins_pkg/pip/binproviders.py +++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/binproviders.py @@ -1,21 +1,26 @@ -__package__ = 'plugins_pkg.pip' - import os import sys import site from pathlib import Path from typing import Optional +from benedict import benedict + from pydantic_pkgr import PipProvider, BinName, BinProviderName -from archivebox.config import CONSTANTS +import abx -from abx.archivebox.base_binary import BaseBinProvider +from abx_plugin_default_binproviders import get_BINPROVIDERS + +DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS()) +env = DEFAULT_BINPROVIDERS.env +apt = DEFAULT_BINPROVIDERS.apt +brew = DEFAULT_BINPROVIDERS.brew ###################### Config ########################## -class SystemPipBinProvider(PipProvider, BaseBinProvider): +class SystemPipBinProvider(PipProvider): name: BinProviderName = "sys_pip" INSTALLER_BIN: BinName = "pip" @@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider): # never modify system pip packages return 'refusing to install packages globally with system pip, use a venv instead' -class SystemPipxBinProvider(PipProvider, BaseBinProvider): +class SystemPipxBinProvider(PipProvider): name: BinProviderName = "pipx" INSTALLER_BIN: BinName = "pipx" @@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider): IS_INSIDE_VENV = sys.prefix != sys.base_prefix -class VenvPipBinProvider(PipProvider, BaseBinProvider): +class VenvPipBinProvider(PipProvider): name: BinProviderName = "venv_pip" INSTALLER_BIN: BinName = "pip" @@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider): return None -class LibPipBinProvider(PipProvider, BaseBinProvider): +class LibPipBinProvider(PipProvider): name: BinProviderName = "lib_pip" INSTALLER_BIN: BinName = "pip" - pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv' + pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv') def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv' - + # update venv path to match most up-to-date LIB_DIR based on runtime config + LIB_DIR = abx.pm.hook.get_LIB_DIR() + self.pip_venv = LIB_DIR / 'pip' / 'venv' super().setup() SYS_PIP_BINPROVIDER = SystemPipBinProvider() diff --git a/archivebox/plugins_pkg/pip/config.py b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py similarity index 86% rename from archivebox/plugins_pkg/pip/config.py rename to archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py index 26cf0f8e..f7464810 100644 --- a/archivebox/plugins_pkg/pip/config.py +++ b/archivebox/pkgs/abx-plugin-pip/abx_plugin_pip/config.py @@ -3,7 +3,7 @@ __package__ = 'pip' from typing import List, Optional from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet class PipDependencyConfigs(BaseConfigSet): diff --git a/archivebox/pkgs/abx-plugin-pip/pyproject.toml b/archivebox/pkgs/abx-plugin-pip/pyproject.toml new file mode 100644 index 00000000..03f88d0b --- /dev/null +++ b/archivebox/pkgs/abx-plugin-pip/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "abx-plugin-pip" +version = "2024.10.24" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", + "django>=5.0.0", +] + + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_pip = "abx_plugin_pip" diff --git a/archivebox/pkgs/abx-plugin-playwright/README.md b/archivebox/pkgs/abx-plugin-playwright/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_pkg/playwright/__init__.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py similarity index 52% rename from archivebox/plugins_pkg/playwright/__init__.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py index 0f66f42c..6d3ed715 100644 --- a/archivebox/plugins_pkg/playwright/__init__.py +++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/__init__.py @@ -1,30 +1,14 @@ -__package__ = 'plugins_pkg.playwright' -__label__ = 'playwright' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' +__label__ = 'Playwright' __homepage__ = 'https://github.com/microsoft/playwright-python' import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'playwright': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import PLAYWRIGHT_CONFIG - return { - 'playwright': PLAYWRIGHT_CONFIG + 'PLAYWRIGHT_CONFIG': PLAYWRIGHT_CONFIG } @abx.hookimpl diff --git a/archivebox/plugins_pkg/playwright/binaries.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py similarity index 54% rename from archivebox/plugins_pkg/playwright/binaries.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py index 0ef63646..4b77d9d4 100644 --- a/archivebox/plugins_pkg/playwright/binaries.py +++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binaries.py @@ -1,20 +1,18 @@ -__package__ = 'plugins_pkg.playwright' +__package__ = 'abx_plugin_playwright' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinName, BinProvider +from pydantic_pkgr import BinName, BinProvider, Binary -from abx.archivebox.base_binary import BaseBinary, env -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER +from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from abx_plugin_default_binproviders import env from .config import PLAYWRIGHT_CONFIG - - -class PlaywrightBinary(BaseBinary): +class PlaywrightBinary(Binary): name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env] diff --git a/archivebox/plugins_pkg/playwright/binproviders.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py similarity index 79% rename from archivebox/plugins_pkg/playwright/binproviders.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py index cb8d35cb..972cb11a 100644 --- a/archivebox/plugins_pkg/playwright/binproviders.py +++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/binproviders.py @@ -1,13 +1,15 @@ -__package__ = 'plugins_pkg.playwright' +__package__ = 'abx_plugin_playwright' import os +import shutil import platform from pathlib import Path from typing import List, Optional, Dict, ClassVar -from pydantic import computed_field, Field +from pydantic import Field from pydantic_pkgr import ( BinName, + BinProvider, BinProviderName, BinProviderOverrides, InstallArgs, @@ -18,25 +20,31 @@ from pydantic_pkgr import ( DEFAULT_ENV_PATH, ) -from archivebox.config import CONSTANTS +import abx -from abx.archivebox.base_binary import BaseBinProvider, env +from abx_plugin_default_binproviders import env -from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER from .binaries import PLAYWRIGHT_BINARY - +USER_PLAYWRIGHT_CACHE_DIR: str | None = os.environ.get("PLAYWRIGHT_BROWSERS_PATH", None) MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright") LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright") +PLAYWRIGHT_CACHE_DIR: Path = Path(USER_PLAYWRIGHT_CACHE_DIR) if USER_PLAYWRIGHT_CACHE_DIR else ( + MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() + if OPERATING_SYSTEM == "darwin" else + LINUX_PLAYWRIGHT_CACHE_DIR.expanduser() +) -class PlaywrightBinProvider(BaseBinProvider): + +class PlaywrightBinProvider(BinProvider): name: BinProviderName = "playwright" INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name - PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" + PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}" + playwright_browsers_dir: Path = PLAYWRIGHT_CACHE_DIR playwright_install_args: List[str] = ["install"] packages_handler: BinProviderOverrides = Field(default={ @@ -45,39 +53,20 @@ class PlaywrightBinProvider(BaseBinProvider): _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} - @computed_field @property def INSTALLER_BIN_ABSPATH(self) -> HostBinPath | None: try: return PLAYWRIGHT_BINARY.load().abspath - except Exception as e: + except Exception: return None - @property - def playwright_browsers_dir(self) -> Path: - # The directory where playwright stores browsers can be overridden with - # the "PLAYWRIGHT_BROWSERS_PATH" environment variable; if it's present - # and a directory, we should use that. See the playwright documentation - # for more details: - # https://playwright.dev/docs/browsers#managing-browser-binaries - dir_path = os.environ.get("PLAYWRIGHT_BROWSERS_PATH") - if dir_path and os.path.isdir(dir_path): - return Path(dir_path) - - # Otherwise return the default path based on the operating system. - return ( - MACOS_PLAYWRIGHT_CACHE_DIR.expanduser() - if OPERATING_SYSTEM == "darwin" else - LINUX_PLAYWRIGHT_CACHE_DIR.expanduser() - ) - def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" + # update paths from config at runtime + LIB_DIR = abx.pm.hook.get_LIB_DIR() + + self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}" - assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized" + assert shutil.which('pip'), "Pip bin provider not initialized" if self.playwright_browsers_dir: self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True) diff --git a/archivebox/plugins_pkg/playwright/config.py b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py similarity index 59% rename from archivebox/plugins_pkg/playwright/config.py rename to archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py index 23f22efc..0c7c6a50 100644 --- a/archivebox/plugins_pkg/playwright/config.py +++ b/archivebox/pkgs/abx-plugin-playwright/abx_plugin_playwright/config.py @@ -1,7 +1,4 @@ -__package__ = 'playwright' - -from abx.archivebox.base_configset import BaseConfigSet - +from abx_spec_config import BaseConfigSet class PlaywrightConfigs(BaseConfigSet): PLAYWRIGHT_BINARY: str = 'playwright' diff --git a/archivebox/pkgs/abx-plugin-playwright/pyproject.toml b/archivebox/pkgs/abx-plugin-playwright/pyproject.toml new file mode 100644 index 00000000..0ad0d995 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-playwright/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-playwright" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic>=2.4.2", + "pydantic-pkgr>=0.5.4", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_playwright = "abx_plugin_playwright" diff --git a/archivebox/pkgs/abx-plugin-pocket/README.md b/archivebox/pkgs/abx-plugin-pocket/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py new file mode 100644 index 00000000..09e5dc8f --- /dev/null +++ b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/__init__.py @@ -0,0 +1,18 @@ +__package__ = 'abx_plugin_pocket' +__label__ = 'Pocket' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import POCKET_CONFIG + + return { + 'POCKET_CONFIG': POCKET_CONFIG + } + +@abx.hookimpl +def ready(): + from .config import POCKET_CONFIG + POCKET_CONFIG.validate() diff --git a/archivebox/plugins_extractor/pocket/config.py b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py similarity index 62% rename from archivebox/plugins_extractor/pocket/config.py rename to archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py index 7866a1f6..31f691b2 100644 --- a/archivebox/plugins_extractor/pocket/config.py +++ b/archivebox/pkgs/abx-plugin-pocket/abx_plugin_pocket/config.py @@ -1,15 +1,12 @@ -__package__ = 'plugins_extractor.pocket' - from typing import Dict - from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config import BaseConfigSet class PocketConfig(BaseConfigSet): POCKET_CONSUMER_KEY: str | None = Field(default=None) - POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} + POCKET_ACCESS_TOKENS: Dict[str, str] = Field(default=dict) # {: , ...} POCKET_CONFIG = PocketConfig() diff --git a/archivebox/pkgs/abx-plugin-pocket/pyproject.toml b/archivebox/pkgs/abx-plugin-pocket/pyproject.toml new file mode 100644 index 00000000..999fa098 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-pocket/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-pocket" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "pocket>=0.3.6", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_pocket = "abx_plugin_pocket" diff --git a/archivebox/pkgs/abx-plugin-puppeteer/README.md b/archivebox/pkgs/abx-plugin-puppeteer/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py new file mode 100644 index 00000000..1ee876d6 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/__init__.py @@ -0,0 +1,30 @@ +__package__ = 'abx_plugin_puppeteer' +__label__ = 'Puppeteer' +__homepage__ = 'https://github.com/puppeteer/puppeteer' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import PUPPETEER_CONFIG + + return { + 'PUPPETEER_CONFIG': PUPPETEER_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import PUPPETEER_BINARY + + return { + 'puppeteer': PUPPETEER_BINARY, + } + +@abx.hookimpl +def get_BINPROVIDERS(): + from .binproviders import PUPPETEER_BINPROVIDER + + return { + 'puppeteer': PUPPETEER_BINPROVIDER, + } diff --git a/archivebox/plugins_pkg/puppeteer/binaries.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py similarity index 54% rename from archivebox/plugins_pkg/puppeteer/binaries.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py index 7e592bba..8afd484f 100644 --- a/archivebox/plugins_pkg/puppeteer/binaries.py +++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binaries.py @@ -1,20 +1,20 @@ -__package__ = 'plugins_pkg.puppeteer' +__package__ = 'abx_plugin_puppeteer' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env +from abx_plugin_default_binproviders import env -from plugins_pkg.npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER ###################### Config ########################## -class PuppeteerBinary(BaseBinary): +class PuppeteerBinary(Binary): name: BinName = "puppeteer" binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/archivebox/plugins_pkg/puppeteer/binproviders.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py similarity index 91% rename from archivebox/plugins_pkg/puppeteer/binproviders.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py index 2ef0eb7a..e7b697bd 100644 --- a/archivebox/plugins_pkg/puppeteer/binproviders.py +++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/binproviders.py @@ -1,5 +1,3 @@ -__package__ = 'plugins_pkg.puppeteer' - import os import platform from pathlib import Path @@ -7,6 +5,7 @@ from typing import List, Optional, Dict, ClassVar from pydantic import Field from pydantic_pkgr import ( + BinProvider, BinName, BinProviderName, BinProviderOverrides, @@ -15,15 +14,15 @@ from pydantic_pkgr import ( HostBinPath, ) +import abx + from archivebox.config import CONSTANTS from archivebox.config.permissions import ARCHIVEBOX_USER -from abx.archivebox.base_binary import BaseBinProvider - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER -class PuppeteerBinProvider(BaseBinProvider): +class PuppeteerBinProvider(BinProvider): name: BinProviderName = "puppeteer" INSTALLER_BIN: BinName = "npx" @@ -42,10 +41,12 @@ class PuppeteerBinProvider(BaseBinProvider): _browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {} def setup(self) -> None: - # update paths from config - from archivebox.config.common import STORAGE_CONFIG - self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers' - self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin') + # update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time + # we want to avoid depending on archivebox from abx code if at all possible + LIB_DIR = abx.pm.hook.get_LIB_DIR() + BIN_DIR = abx.pm.hook.get_BIN_DIR() + self.puppeteer_browsers_dir = LIB_DIR / 'browsers' + self.PATH = str(BIN_DIR) assert SYS_NPM_BINPROVIDER.INSTALLER_BIN_ABSPATH, "NPM bin provider not initialized" diff --git a/archivebox/plugins_pkg/puppeteer/config.py b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py similarity index 79% rename from archivebox/plugins_pkg/puppeteer/config.py rename to archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py index b76d0779..f09e7062 100644 --- a/archivebox/plugins_pkg/puppeteer/config.py +++ b/archivebox/pkgs/abx-plugin-puppeteer/abx_plugin_puppeteer/config.py @@ -1,7 +1,7 @@ -__package__ = 'plugins_pkg.puppeteer' +__package__ = 'abx_plugin_puppeteer' -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet ###################### Config ########################## diff --git a/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml b/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml new file mode 100644 index 00000000..2633b481 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-puppeteer/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-puppeteer" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_puppeteer = "abx_plugin_puppeteer" diff --git a/archivebox/pkgs/abx-plugin-readability/README.md b/archivebox/pkgs/abx-plugin-readability/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py new file mode 100644 index 00000000..cb7d35af --- /dev/null +++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/__init__.py @@ -0,0 +1,30 @@ +__package__ = 'abx_plugin_readability' +__label__ = 'Readability' +__homepage__ = 'https://github.com/ArchiveBox/readability-extractor' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import READABILITY_CONFIG + + return { + 'READABILITY_CONFIG': READABILITY_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import READABILITY_BINARY + + return { + 'readability': READABILITY_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import READABILITY_EXTRACTOR + + return { + 'readability': READABILITY_EXTRACTOR, + } diff --git a/archivebox/plugins_extractor/readability/binaries.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py similarity index 69% rename from archivebox/plugins_extractor/readability/binaries.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py index 43343924..65ecf57c 100644 --- a/archivebox/plugins_extractor/readability/binaries.py +++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/binaries.py @@ -1,20 +1,19 @@ -__package__ = 'plugins_extractor.readability' +__package__ = 'abx_plugin_readability' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName -from abx.archivebox.base_binary import BaseBinary, env - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_default_binproviders import env +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import READABILITY_CONFIG READABILITY_PACKAGE_NAME = 'github:ArchiveBox/readability-extractor' -class ReadabilityBinary(BaseBinary): +class ReadabilityBinary(Binary): name: BinName = READABILITY_CONFIG.READABILITY_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/archivebox/plugins_extractor/readability/config.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py similarity index 83% rename from archivebox/plugins_extractor/readability/config.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py index 8066d56c..726295fe 100644 --- a/archivebox/plugins_extractor/readability/config.py +++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/config.py @@ -1,8 +1,6 @@ -__package__ = 'plugins_extractor.readability' - from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py new file mode 100644 index 00000000..64d712ed --- /dev/null +++ b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/extractors.py @@ -0,0 +1,19 @@ +# __package__ = 'abx_plugin_readability' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + + +# from .binaries import READABILITY_BINARY + + +# class ReadabilityExtractor(BaseExtractor): +# name: str = 'readability' +# binary: BinName = READABILITY_BINARY.name + +# def get_output_path(self, snapshot) -> Path: +# return Path(snapshot.link_dir) / 'readability' / 'content.html' + + +# READABILITY_EXTRACTOR = ReadabilityExtractor() diff --git a/archivebox/extractors/readability.py b/archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py similarity index 100% rename from archivebox/extractors/readability.py rename to archivebox/pkgs/abx-plugin-readability/abx_plugin_readability/readability.py diff --git a/archivebox/pkgs/abx-plugin-readability/pyproject.toml b/archivebox/pkgs/abx-plugin-readability/pyproject.toml new file mode 100644 index 00000000..59a2db64 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-readability/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-plugin-readability" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_readability = "abx_plugin_readability" diff --git a/archivebox/pkgs/abx-plugin-readwise/README.md b/archivebox/pkgs/abx-plugin-readwise/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py b/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py new file mode 100644 index 00000000..ea31cd14 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-readwise/abx_plugin_readwise.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_readwise_extractor' +__id__ = 'abx_plugin_readwise_extractor' +__label__ = 'Readwise API' +__version__ = '2024.10.27' +__author__ = 'ArchiveBox' +__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise' +__dependencies__ = [] + +import abx + +from typing import Dict +from pathlib import Path + +from pydantic import Field + +from abx_spec_config.base_configset import BaseConfigSet + +SOURCES_DIR = abx.pm.hook.get_CONFIG().SOURCES_DIR + + +class ReadwiseConfig(BaseConfigSet): + READWISE_DB_PATH: Path = Field(default=SOURCES_DIR / "readwise_reader_api.db") + READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} + + +@abx.hookimpl +def get_CONFIG(): + return { + __id__: ReadwiseConfig() + } + +@abx.hookimpl +def ready(): + READWISE_CONFIG = abx.pm.hook.get_CONFIG()[__id__] + READWISE_CONFIG.validate() diff --git a/archivebox/pkgs/abx-plugin-readwise/pyproject.toml b/archivebox/pkgs/abx-plugin-readwise/pyproject.toml new file mode 100644 index 00000000..c85d489f --- /dev/null +++ b/archivebox/pkgs/abx-plugin-readwise/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-readwise" +version = "2024.10.28" +description = "Readwise API Extractor" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_readwise = "abx_plugin_readwise" + diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/README.md b/archivebox/pkgs/abx-plugin-ripgrep-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py new file mode 100644 index 00000000..91347523 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/__init__.py @@ -0,0 +1,31 @@ +__package__ = 'abx_plugin_ripgrep_search' +__label__ = 'Ripgrep Search' +__homepage__ = 'https://github.com/BurntSushi/ripgrep' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import RIPGREP_CONFIG + + return { + 'RIPGREP_CONFIG': RIPGREP_CONFIG + } + + +@abx.hookimpl +def get_BINARIES(): + from .binaries import RIPGREP_BINARY + + return { + 'ripgrep': RIPGREP_BINARY + } + + +@abx.hookimpl +def get_SEARCHBACKENDS(): + from .searchbackend import RIPGREP_SEARCH_BACKEND + + return { + 'ripgrep': RIPGREP_SEARCH_BACKEND, + } diff --git a/archivebox/plugins_search/ripgrep/binaries.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py similarity index 65% rename from archivebox/plugins_search/ripgrep/binaries.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py index 710a1ef0..ef9217ad 100644 --- a/archivebox/plugins_search/ripgrep/binaries.py +++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import RIPGREP_CONFIG -class RipgrepBinary(BaseBinary): +class RipgrepBinary(Binary): name: BinName = RIPGREP_CONFIG.RIPGREP_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/plugins_search/ripgrep/config.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py similarity index 89% rename from archivebox/plugins_search/ripgrep/config.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py index 726c21e8..e0fd3b28 100644 --- a/archivebox/plugins_search/ripgrep/config.py +++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/config.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' from pathlib import Path from typing import List from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config import CONSTANTS from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_search/ripgrep/searchbackend.py b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py similarity index 93% rename from archivebox/plugins_search/ripgrep/searchbackend.py rename to archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py index 3c30af85..ed3965ba 100644 --- a/archivebox/plugins_search/ripgrep/searchbackend.py +++ b/archivebox/pkgs/abx-plugin-ripgrep-search/abx_plugin_ripgrep_search/searchbackend.py @@ -1,11 +1,11 @@ -__package__ = 'plugins_search.ripgrep' +__package__ = 'abx_plugin_ripgrep_search' import re import subprocess from typing import List, Iterable -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .binaries import RIPGREP_BINARY from .config import RIPGREP_CONFIG diff --git a/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml b/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml new file mode 100644 index 00000000..67245c48 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-ripgrep-search/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-ripgrep-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-searchbackend>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_ripgrep_search = "abx_plugin_ripgrep_search" diff --git a/archivebox/pkgs/abx-plugin-singlefile/README.md b/archivebox/pkgs/abx-plugin-singlefile/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py new file mode 100644 index 00000000..be6dcd02 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/__init__.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_singlefile' +__label__ = 'Singlefile' +__homepage__ = 'https://github.com/gildas-lormeau/singlefile' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import SINGLEFILE_CONFIG + + return { + 'SINGLEFILE_CONFIG': SINGLEFILE_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import SINGLEFILE_BINARY + + return { + 'singlefile': SINGLEFILE_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import SINGLEFILE_EXTRACTOR + + return { + 'singlefile': SINGLEFILE_EXTRACTOR, + } + +@abx.hookimpl +def get_INSTALLED_APPS(): + # needed to load ./models.py + return [__package__] diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py new file mode 100644 index 00000000..d928d0fd --- /dev/null +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/actors.py @@ -0,0 +1,27 @@ +__package__ = 'abx_plugin_singlefile' + +from typing import ClassVar +from django.db.models import QuerySet +from django.utils.functional import classproperty + +from actors.actor import ActorType + +from .models import SinglefileResult + + +class SinglefileActor(ActorType[SinglefileResult]): + CLAIM_ORDER: ClassVar[str] = 'created_at DESC' + CLAIM_WHERE: ClassVar[str] = 'status = "queued" AND extractor = "favicon"' + CLAIM_SET: ClassVar[str] = 'status = "started"' + + @classproperty + def QUERYSET(cls) -> QuerySet: + return SinglefileResult.objects.filter(status='queued') + + def tick(self, obj: SinglefileResult): + print(f'[grey53]{self}.tick({obj.abid or obj.id}, status={obj.status}) remaining:[/grey53]', self.get_queue().count()) + updated = SinglefileResult.objects.filter(id=obj.id, status='started').update(status='success') == 1 + if not updated: + raise Exception(f'Failed to update {obj.abid or obj.id}, interrupted by another actor writing to the same object') + obj.refresh_from_db() + obj.save() diff --git a/archivebox/plugins_extractor/singlefile/binaries.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py similarity index 84% rename from archivebox/plugins_extractor/singlefile/binaries.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py index 0c8a1bab..7af784a3 100644 --- a/archivebox/plugins_extractor/singlefile/binaries.py +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/binaries.py @@ -1,13 +1,10 @@ -__package__ = 'plugins_extractor.singlefile' - from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, bin_abspath +from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath -from abx.archivebox.base_binary import BaseBinary, env - -from plugins_pkg.npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER +from abx_plugin_default_binproviders import env +from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from .config import SINGLEFILE_CONFIG @@ -16,7 +13,7 @@ SINGLEFILE_MIN_VERSION = '1.1.54' SINGLEFILE_MAX_VERSION = '1.1.60' -class SinglefileBinary(BaseBinary): +class SinglefileBinary(Binary): name: BinName = SINGLEFILE_CONFIG.SINGLEFILE_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_NPM_BINPROVIDER, SYS_NPM_BINPROVIDER, env] diff --git a/archivebox/plugins_extractor/singlefile/config.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py similarity index 88% rename from archivebox/plugins_extractor/singlefile/config.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py index 7d27031e..0d2164ba 100644 --- a/archivebox/plugins_extractor/singlefile/config.py +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/config.py @@ -1,11 +1,9 @@ -__package__ = 'plugins_extractor.singlefile' - from pathlib import Path from typing import List, Optional from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py new file mode 100644 index 00000000..07b674ac --- /dev/null +++ b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/extractors.py @@ -0,0 +1,18 @@ +__package__ = 'abx_plugin_singlefile' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + +# from .binaries import SINGLEFILE_BINARY + + +# class SinglefileExtractor(BaseExtractor): +# name: str = 'singlefile' +# binary: BinName = SINGLEFILE_BINARY.name + +# def get_output_path(self, snapshot) -> Path: +# return Path(snapshot.link_dir) / 'singlefile.html' + + +# SINGLEFILE_EXTRACTOR = SinglefileExtractor() diff --git a/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/singlefile/models.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py similarity index 100% rename from archivebox/plugins_extractor/singlefile/models.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/models.py diff --git a/archivebox/extractors/singlefile.py b/archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py similarity index 100% rename from archivebox/extractors/singlefile.py rename to archivebox/pkgs/abx-plugin-singlefile/abx_plugin_singlefile/singlefile.py diff --git a/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml b/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml new file mode 100644 index 00000000..7cecd40a --- /dev/null +++ b/archivebox/pkgs/abx-plugin-singlefile/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-singlefile" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_singlefile = "abx_plugin_singlefile" diff --git a/archivebox/pkgs/abx-plugin-sonic-search/README.md b/archivebox/pkgs/abx-plugin-sonic-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_search/sonic/__init__.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py similarity index 53% rename from archivebox/plugins_search/sonic/__init__.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py index a899679b..1a92a8d2 100644 --- a/archivebox/plugins_search/sonic/__init__.py +++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/__init__.py @@ -1,32 +1,16 @@ -__package__ = 'plugins_search.sonic' -__label__ = 'sonic' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' +__package__ = 'abx_plugin_sonic_search' +__label__ = 'Sonic Search' __homepage__ = 'https://github.com/valeriansaliou/sonic' -__dependencies__ = [] import abx -@abx.hookimpl -def get_PLUGIN(): - return { - 'sonic': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import SONIC_CONFIG return { - 'sonic': SONIC_CONFIG + 'SONIC_CONFIG': SONIC_CONFIG } diff --git a/archivebox/plugins_search/sonic/binaries.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py similarity index 80% rename from archivebox/plugins_search/sonic/binaries.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py index eab987c5..2e8fb536 100644 --- a/archivebox/plugins_search/sonic/binaries.py +++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/binaries.py @@ -1,16 +1,16 @@ -__package__ = 'plugins_search.sonic' +__package__ = 'abx_plugin_sonic_search' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinaryOverrides, BinName +from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, brew +from abx_plugin_default_binproviders import brew, env from .config import SONIC_CONFIG -class SonicBinary(BaseBinary): +class SonicBinary(Binary): name: BinName = SONIC_CONFIG.SONIC_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [brew, env] # TODO: add cargo diff --git a/archivebox/plugins_search/sonic/config.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py similarity index 93% rename from archivebox/plugins_search/sonic/config.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py index d54ed568..97cc7b3a 100644 --- a/archivebox/plugins_search/sonic/config.py +++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/config.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_search.sonic' +__package__ = 'abx_plugin_sonic_search' import sys from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_search/sonic/searchbackend.py b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py similarity index 97% rename from archivebox/plugins_search/sonic/searchbackend.py rename to archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py index 1662e5b2..a63a0132 100644 --- a/archivebox/plugins_search/sonic/searchbackend.py +++ b/archivebox/pkgs/abx-plugin-sonic-search/abx_plugin_sonic_search/searchbackend.py @@ -2,7 +2,7 @@ __package__ = 'plugins_search.sonic' from typing import List, Generator, cast -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .config import SONIC_CONFIG, SONIC_LIB diff --git a/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml new file mode 100644 index 00000000..b6551b52 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-sonic-search/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-plugin-sonic-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-searchbackend>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_sonic_search = "abx_plugin_sonic_search" diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/README.md b/archivebox/pkgs/abx-plugin-sqlitefts-search/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py new file mode 100644 index 00000000..5d5ed6de --- /dev/null +++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/__init__.py @@ -0,0 +1,21 @@ +__package__ = 'abx_plugin_sqlitefts_search' +__label__ = 'SQLiteFTS Search' + +import abx + +@abx.hookimpl +def get_CONFIG(): + from .config import SQLITEFTS_CONFIG + + return { + 'SQLITEFTS_CONFIG': SQLITEFTS_CONFIG + } + + +@abx.hookimpl +def get_SEARCHBACKENDS(): + from .searchbackend import SQLITEFTS_SEARCH_BACKEND + + return { + 'sqlitefts': SQLITEFTS_SEARCH_BACKEND, + } diff --git a/archivebox/plugins_search/sqlitefts/config.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py similarity index 96% rename from archivebox/plugins_search/sqlitefts/config.py rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py index 5690dc6c..789ff114 100644 --- a/archivebox/plugins_search/sqlitefts/config.py +++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/config.py @@ -1,4 +1,4 @@ -__package__ = 'plugins_search.sqlitefts' +__package__ = 'abx_plugin_sqlitefts_search' import sys import sqlite3 @@ -8,7 +8,7 @@ from django.core.exceptions import ImproperlyConfigured from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import SEARCH_BACKEND_CONFIG diff --git a/archivebox/plugins_search/sqlitefts/searchbackend.py b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py similarity index 98% rename from archivebox/plugins_search/sqlitefts/searchbackend.py rename to archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py index 630bdd4c..2ae7c9cf 100644 --- a/archivebox/plugins_search/sqlitefts/searchbackend.py +++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/abx_plugin_sqlitefts_search/searchbackend.py @@ -1,10 +1,10 @@ -__package__ = 'plugins_search.sqlitefts' +__package__ = 'abx_plugin_sqlitefts_search' import codecs import sqlite3 from typing import List, Iterable -from abx.archivebox.base_searchbackend import BaseSearchBackend +from abx_spec_searchbackend import BaseSearchBackend from .config import SQLITEFTS_CONFIG diff --git a/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml b/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml new file mode 100644 index 00000000..abc6181a --- /dev/null +++ b/archivebox/pkgs/abx-plugin-sqlitefts-search/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-sqlitefts-search" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-searchbackend>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_sqlitefts_search = "abx_plugin_sqlitefts_search" diff --git a/archivebox/pkgs/abx-plugin-title/README.md b/archivebox/pkgs/abx-plugin-title/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py new file mode 100644 index 00000000..d3e5cac5 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/__init__.py @@ -0,0 +1,9 @@ +import abx + +# @abx.hookimpl +# def get_CONFIG(): +# from .config import TITLE_EXTRACTOR_CONFIG + +# return { +# 'title_extractor': TITLE_EXTRACTOR_CONFIG +# } diff --git a/archivebox/extractors/title.py b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py similarity index 97% rename from archivebox/extractors/title.py rename to archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py index ceefb699..a8ef52cf 100644 --- a/archivebox/extractors/title.py +++ b/archivebox/pkgs/abx-plugin-title/abx_plugin_title/extractor.py @@ -11,8 +11,8 @@ from archivebox.misc.util import ( htmldecode, dedupe, ) -from archivebox.plugins_extractor.curl.config import CURL_CONFIG -from archivebox.plugins_extractor.curl.binaries import CURL_BINARY +from abx_plugin_curl_extractor.config import CURL_CONFIG +from abx_plugin_curl_extractor.binaries import CURL_BINARY from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError from ..logging_util import TimedProgress diff --git a/archivebox/pkgs/abx-plugin-title/pyproject.toml b/archivebox/pkgs/abx-plugin-title/pyproject.toml new file mode 100644 index 00000000..a9737b3a --- /dev/null +++ b/archivebox/pkgs/abx-plugin-title/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-title" +version = "2024.10.27" +description = "Title Extractor" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-plugin-curl>=2024.10.28", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_title = "abx_plugin_title" diff --git a/archivebox/pkgs/abx-plugin-wget/README.md b/archivebox/pkgs/abx-plugin-wget/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py new file mode 100644 index 00000000..a32987ee --- /dev/null +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/__init__.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_wget' +__label__ = 'WGET' + +import abx + + +@abx.hookimpl +def get_CONFIG(): + from .config import WGET_CONFIG + + return { + 'WGET_CONFIG': WGET_CONFIG + } + +@abx.hookimpl +def get_BINARIES(): + from .binaries import WGET_BINARY + + return { + 'wget': WGET_BINARY, + } + +@abx.hookimpl +def get_EXTRACTORS(): + from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR + + return { + 'wget': WGET_EXTRACTOR, + 'warc': WARC_EXTRACTOR, + } + +@abx.hookimpl +def ready(): + from .config import WGET_CONFIG + WGET_CONFIG.validate() diff --git a/archivebox/plugins_extractor/wget/binaries.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py similarity index 57% rename from archivebox/plugins_extractor/wget/binaries.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py index 6198beac..39cbe111 100644 --- a/archivebox/plugins_extractor/wget/binaries.py +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/binaries.py @@ -1,17 +1,17 @@ -__package__ = 'plugins_extractor.wget' +__package__ = 'abx_plugin_wget' from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName +from pydantic_pkgr import BinProvider, BinName, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew +from abx_plugin_default_binproviders import apt, brew, env from .config import WGET_CONFIG -class WgetBinary(BaseBinary): +class WgetBinary(Binary): name: BinName = WGET_CONFIG.WGET_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/plugins_extractor/wget/config.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py similarity index 96% rename from archivebox/plugins_extractor/wget/config.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py index 12edf672..1dfd1b07 100644 --- a/archivebox/plugins_extractor/wget/config.py +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/config.py @@ -1,12 +1,10 @@ -__package__ = 'plugins_extractor.wget' - import subprocess from typing import List, Optional from pathlib import Path from pydantic import Field -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG, STORAGE_CONFIG from archivebox.misc.logging import STDERR diff --git a/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py new file mode 100644 index 00000000..4d4d0243 --- /dev/null +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/extractors.py @@ -0,0 +1,35 @@ +__package__ = 'abx_plugin_wget' + +# from pathlib import Path + +# from pydantic_pkgr import BinName + +# from .binaries import WGET_BINARY +# from .wget_util import wget_output_path + +# class WgetExtractor(BaseExtractor): +# name: ExtractorName = 'wget' +# binary: BinName = WGET_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# wget_index_path = wget_output_path(snapshot.as_link()) +# if wget_index_path: +# return Path(wget_index_path) +# return None + +# WGET_EXTRACTOR = WgetExtractor() + + +# class WarcExtractor(BaseExtractor): +# name: ExtractorName = 'warc' +# binary: BinName = WGET_BINARY.name + +# def get_output_path(self, snapshot) -> Path | None: +# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) +# if warc_files: +# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] +# return None + + +# WARC_EXTRACTOR = WarcExtractor() + diff --git a/archivebox/extractors/wget.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py similarity index 97% rename from archivebox/extractors/wget.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py index 416e797e..caaaeaf6 100644 --- a/archivebox/extractors/wget.py +++ b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget.py @@ -1,4 +1,4 @@ -__package__ = 'archivebox.extractors' +__package__ = 'abx_plugin_wget_extractor' import re import os @@ -17,10 +17,11 @@ from archivebox.misc.util import ( urldecode, dedupe, ) -from archivebox.plugins_extractor.wget.config import WGET_CONFIG -from archivebox.plugins_extractor.wget.binaries import WGET_BINARY -from ..logging_util import TimedProgress -from ..index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError +from .config import WGET_CONFIG +from .binaries import WGET_BINARY + +from archivebox.logging_util import TimedProgress +from archivebox.index.schema import Link, ArchiveResult, ArchiveOutput, ArchiveError def get_output_path(): diff --git a/archivebox/plugins_extractor/wget/wget_util.py b/archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py similarity index 100% rename from archivebox/plugins_extractor/wget/wget_util.py rename to archivebox/pkgs/abx-plugin-wget/abx_plugin_wget/wget_util.py diff --git a/archivebox/pkgs/abx-plugin-wget/pyproject.toml b/archivebox/pkgs/abx-plugin-wget/pyproject.toml new file mode 100644 index 00000000..d401e52f --- /dev/null +++ b/archivebox/pkgs/abx-plugin-wget/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-plugin-wget" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_wget = "abx_plugin_wget" diff --git a/archivebox/pkgs/abx-plugin-ytdlp/README.md b/archivebox/pkgs/abx-plugin-ytdlp/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/plugins_extractor/ytdlp/__init__.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py similarity index 53% rename from archivebox/plugins_extractor/ytdlp/__init__.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py index 1dc9ef99..5b1d9968 100644 --- a/archivebox/plugins_extractor/ytdlp/__init__.py +++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/__init__.py @@ -1,30 +1,15 @@ -__package__ = 'plugins_extractor.ytdlp' +__package__ = 'abx_plugin_ytdlp' __label__ = 'YT-DLP' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' __homepage__ = 'https://github.com/yt-dlp/yt-dlp' import abx - -@abx.hookimpl -def get_PLUGIN(): - return { - 'ytdlp': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - } - } - @abx.hookimpl def get_CONFIG(): from .config import YTDLP_CONFIG return { - 'ytdlp': YTDLP_CONFIG + 'YTDLP_CONFIG': YTDLP_CONFIG } @abx.hookimpl diff --git a/archivebox/plugins_extractor/ytdlp/binaries.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py similarity index 77% rename from archivebox/plugins_extractor/ytdlp/binaries.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py index 730de2dc..69239515 100644 --- a/archivebox/plugins_extractor/ytdlp/binaries.py +++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/binaries.py @@ -1,26 +1,25 @@ -__package__ = 'plugins_extractor.ytdlp' +__package__ = 'abx_plugin_ytdlp' import subprocess from typing import List from pydantic import InstanceOf -from pydantic_pkgr import BinProvider, BinName, BinaryOverrides +from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary -from abx.archivebox.base_binary import BaseBinary, env, apt, brew - -from plugins_pkg.pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER +from abx_plugin_default_binproviders import apt, brew, env +from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER from .config import YTDLP_CONFIG -class YtdlpBinary(BaseBinary): +class YtdlpBinary(Binary): name: BinName = YTDLP_CONFIG.YTDLP_BINARY binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env] YTDLP_BINARY = YtdlpBinary() -class FfmpegBinary(BaseBinary): +class FfmpegBinary(Binary): name: BinName = 'ffmpeg' binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env] diff --git a/archivebox/plugins_extractor/ytdlp/config.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py similarity index 97% rename from archivebox/plugins_extractor/ytdlp/config.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py index 0082df3d..b36d19d1 100644 --- a/archivebox/plugins_extractor/ytdlp/config.py +++ b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/config.py @@ -4,7 +4,7 @@ from typing import List from pydantic import Field, AliasChoices -from abx.archivebox.base_configset import BaseConfigSet +from abx_spec_config.base_configset import BaseConfigSet from archivebox.config.common import ARCHIVING_CONFIG from archivebox.misc.logging import STDERR diff --git a/archivebox/extractors/media.py b/archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py similarity index 100% rename from archivebox/extractors/media.py rename to archivebox/pkgs/abx-plugin-ytdlp/abx_plugin_ytdlp/media.py diff --git a/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml b/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml new file mode 100644 index 00000000..b45626bd --- /dev/null +++ b/archivebox/pkgs/abx-plugin-ytdlp/pyproject.toml @@ -0,0 +1,19 @@ +[project] +name = "abx-plugin-ytdlp" +version = "2024.10.28" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_plugin_ytdlp = "abx_plugin_ytdlp" diff --git a/archivebox/pkgs/abx-spec-archivebox/README.md b/archivebox/pkgs/abx-spec-archivebox/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py new file mode 100644 index 00000000..ab591c96 --- /dev/null +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/__init__.py @@ -0,0 +1,28 @@ +__package__ = 'abx_spec_archivebox' +__order__ = 400 + +# from .effects import * +# from .events import * +# from .reads import * +# from .writes import * +# from .states import * + +from typing import cast + +import abx +from abx_spec_config import ConfigPluginSpec +from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec +from abx_spec_django import DjangoPluginSpec +from abx_spec_searchbackend import SearchBackendPluginSpec + +class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec): + """ + ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs. + """ + pass + +PLUGIN_SPEC = ArchiveBoxPluginSpec + + +TypedPluginManager = abx.ABXPluginManager[ArchiveBoxPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/archivebox/abx/archivebox/effects.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py similarity index 100% rename from archivebox/abx/archivebox/effects.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/effects.py diff --git a/archivebox/abx/archivebox/events.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py similarity index 100% rename from archivebox/abx/archivebox/events.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/events.py diff --git a/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py new file mode 100644 index 00000000..30d6667d --- /dev/null +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/reads.py @@ -0,0 +1,33 @@ +__package__ = 'abx.archivebox' + + +from benedict import benedict + + +def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None): + """Get all the relevant config for the given scope, in correct precedence order""" + + from django.conf import settings + default_config: benedict = defaults or settings.CONFIG + + snapshot = snapshot or (archiveresult and archiveresult.snapshot) + crawl = crawl or (snapshot and snapshot.crawl) + seed = seed or (crawl and crawl.seed) + persona = persona or (crawl and crawl.persona) + + persona_config = persona.config if persona else {} + seed_config = seed.config if seed else {} + crawl_config = crawl.config if crawl else {} + snapshot_config = snapshot.config if snapshot else {} + archiveresult_config = archiveresult.config if archiveresult else {} + extra_config = extra_config or {} + + return benedict({ + **default_config, # defaults / config file / environment variables + **persona_config, # lowest precedence + **seed_config, + **crawl_config, + **snapshot_config, + **archiveresult_config, + **extra_config, # highest precedence + }) diff --git a/archivebox/abx/archivebox/states.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py similarity index 61% rename from archivebox/abx/archivebox/states.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py index 15d06f61..05284f37 100644 --- a/archivebox/abx/archivebox/states.py +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/states.py @@ -20,11 +20,130 @@ from django.urls import reverse_lazy from pathlib import Path +# Glossary: +# - startup: when a new process is spawned +# - shutdown: when a process is exiting +# - start: at the beginning of some python code block +# - end: at the end of some python code block +# - queue: a django queryset of objects of a single type that are waiting to be processed +# - actor: a long-running daemon process that wakes up and processes a single object from a queue at a time +# - plugin: a python package that defines some hookimpls based on hookspecs exposed by ABX +# - object: an instance of a django model that represents a single row in the database +# ORCHESTRATOR: +# An orchestrator is a single long-running daemon process that manages spawning and killing actors for different queues of objects. +# The orchestrator first starts when the archivebox starts, and it stops when archivebox is killed. +# Only one orchestrator process can be running per collection per machine. +# An orchestrator is aware of all of the ActorTypes that are defined in the system, and their associated queues. +# When started, the orchestrator runs a single runloop that continues until the archivebox process is killed. +# On each loop, the orchestrator: +# - loops through each ActorType defined in the system: +# - fetches the queue of objects pending for that ActorType by calling ActorType.get_queue() +# - check how many actors are currently running for that ActorType by calling current_actors = ActorType.get_running_actors() +# - determine how many new actors are needed and what their launch kwargs should be to process the objects in each queue +# actors_to_spawn = ActorType.get_actors_to_spawn(queue, current_actors) +# - e.g. if there is are 4 ArchiveResult objects queued all with the same persona + extractor, it should spawn a single actor to process all of them, if there are 4000 it should spawn ~5 actors +# - if there are 4 ArchiveResult objects queued with different personas + extractors, it should spawn a single actor for each persona + extractor +# - if there are *many* objects to process, it can spawn more actors of the same type up to ActorType.MAX_ACTORS to speed things up +# - spawns the new of actors needed as subprocesses ActorType.spawn_actors(actors_to_spawn, block=False, double_fork=False) +# - checks for ANY objects in the DB that have a retry_at time set but where no ActorType has them in their queue, and raises a warning that they are orphaned and will never be processed +# - sleeps for 0.1s before repeating the loop, to reduce the CPU load +# The orchestrator does not manage killing actors, actors are expected to exit on their own when idle. +# ABX defines the following hookspecs for plugins to hook into the orchestrator lifecycle: +# - abx.pm.hook.on_orchestrator_startup(all_actor_types) +# - abx.pm.hook.on_orchestrator_tick_started(all_actor_types, all_queues, all_running_actors) +# - abx.pm.hook.on_orchestrator_idle(all_actor_types) # only run when there are no queues with pending objects to process +# - abx.pm.hook.on_orchestrator_shutdown(all_actor_types) +# OBJECT: +# e.g. Snapshot, Crawl, ArchiveResult +# An object is a single row in a database table, defined by a django model. +# An object has a finite set of states that it can be in. +# An object has a status field that holds the object's current state e.g status="queued". +# An object has a retry_at field that holds a timestamp for when it should next be checked by a actor eventloop. +# Each type of object has a single tick() method defined that handles all of its state transitions. +# When an object's retry_at time has passed, the actor managing that type of object will spwan an actor an call tick(object) to move it to its next state. +# ABX defines the following hookspecs for plugins to hook into object lifecycle: # use these for in-memory operations, dont use these for db on_create/on_update/on_delete logic, separate hooks are available on write operations below +# - abx.pm.hook.on__init(object) # when object is initialized in-memory, don't put any slow code here as it runs on every object returned from DB queries! only for setting default values, ._cache_attrs, etc. +# - abx.pm.hook.on__clean(object) # when object's form fields are validated but before it is to be saved to the DB, put any checks/validations on field values here +# - abx.pm.hook.on__save(object) # when object is being saved to the DB, put any code here that should run right before super().save() +# ACTORS: +# A actor is a long-running daemon process that runs a loop to process a single object at a time from a queue it defines (e.g. ActorType.queue=Snapshot.objects.filter(status='queued', retry_at__lte=time.now())). +# An actor at runtime is an instance of an ActorType class + some launch kwargs that it's passed at startup (e.g. persona, extractor, etc.). +# Actors are started lazily by the orchestrator only when their ActorType.queue indicates there are pending objects to process. +# ActorTypes should define ActorType.get_queue(), ActorType.get_actors_to_spawn(), ActorType.get_running_actors(), and ActorType.spawn_actors() methods exposed to the orchestrator. +# On startup, a actor can initialize shared resources it needs to perform its work, and keep a reference in memory to them. (e.g. launch chrome in the background, setup an API client, etc.) +# On each loop, the actor gets a single object to process from the top of the queue, and runs ActorType.tick(object). +# The actor should have a hardcoded ActorType.MAX_TICK_TIME, and should enforce it by killing the tick() method if it runs too long. +# Before calling tick(), a actor should bump the object.retry_at time by MAX_TICK_TIME to prevent other actors from picking it up while the current actor is still processing it. +# The actor blocks waiting for tick(obj) to finish executing, then the loop repeats and it gets the next object to call tick(object) on. +# If a tick(obj) method raises an exception, the actor should catch it and log it, then move on to the next object in the queue. +# If there are no objects left in the queue, the actor should exit. +# On exit, a actor should release any shared resources it initialized on startup and clean up after itself. +# On startup an actor should fire abx.pm.hook.on_actor_startup(object) and on exit it should fire abx.pm.hook.on_actor_exit(object) (both syncronous hooks that can be used by plugins to register any startup/cleanup code). +# An ActorType defines the following hookspecs for plugins to hook into its behavior: +# - abx.pm.hook.on_actor_startup(actor, queue) +# - abx.pm.hook.on_actor_tick_start(actor, object) +# - abx.pm.hook.on_actor_tick_end(actor, object) +# - abx.pm.hook.on_actor_tick_exception(actor, object, exception) +# - abx.pm.hook.on_actor_shutdown(actor) +# TICK: +# A tick() method is a method defined on an ActorType, passed a single object to process and perform a single state transition on. +# A tick() method does NOT need to lock the object its operating on, the actor will bump the object's retry_at += MAX_TICK_TIME before handing it off to tick(). +# A tick() method does NOT open a DB transaction for its entire duration of execution, instead it should do all its writes in one atomic operation using a compare-and-swap .select(status=previous_state).update(status=next_state) (optimistic concurrency control). +# A tick() method does NOT return any values, it either succeeds and returns None, or fails and raises an exception to be handled by the actor runloop. +# A tick() method does NOT need to enforce its own MAX_TICK_TIME / any timeouts, the actor runloop code should enforce that. +# A tick() should NOT call other tick() methods directly, and it should not spawn orchestrator or actor processes. +# A tick() should set its object.retry_at time to a value farther in the future and return early if it wants to skip execution due to hitting a ratelimit or transient error. +# A tick() can: +# - read from any other objects, filesystem, or external APIs (e.g. check if snapshot_dir/screenshot.png exists) +# - perform any checks necessary and branch and determine what the transition it should perform to which next state +# - execute a single transition_from_abx_to_xyz(object) method to perform the transition to the next state it decided on + +# TRANSITION: +# A transition_from_abx_to_xyz(object) method is a function defined on an ActorType, passed a single object by a tick() method to perform a defined transition on. +# A transition_from_abx_to_xyz() method does NOT need to lock the object its operating on or open any db transactions. +# A transiton should not have any branching logic, it should only execute the given transition that it defines + any side effects. +# A transition should be indempotent, if two transitions run at once on the same object it should only perform one transition and the other should fail +# A transition should be atomic, if it is interrupted it should leave the object in a consistent state +# A transition's main body should: +# - perform a SINGLE write() to the underlying object using a compare_and_swap .filter(status=last_state).update(status=next_state) to move it to its next state +# - update the object's retry_at time to a new value, or set it to None if it's in a final state & should not be checked again +# A transition can also trigger side effects at the end of its execution: +# - update the retry_at time on *other* objects (so that they are rechecked by their own actor on the next tick) (ONLY retry_at, do not update any other fields) +# - filesystem operations (e.g. moving a directory to a new location) +# - external API calls (e.g. uploading to s3, firing a webhook, writing to a logfile, etc.) +# - DO NOT use side effects to directly mutate other objects state or trigger other state transitions +# ABX defines the following hookspecs for plugins to hook into transition behavior: +# - abx.pm.hook.on_transition__from_abx_to_xyz_start(object) +# - abx.pm.hook.on_transition__from_abx_to_xyz_end(object) + +# READ: +# A read() method is a function defined for a given ActorType that performs a single read from the DB and/or other read models like django cache, filesystem, in-memory caches, etc. +# A read() method should accept either an instance/pk/uuid/abid or some filter_kwargs, and return a benedict/TypedDict or pydantic model containing bare values as the result. + +# WRITE: +# A write() method is a function defined for a given ActorType that performs a single atomic db write to update the DB, django cache, filesystem, in-memory caches, etc. for that object. +# A write() method does NOT need to lock the object its operating on or open any db transactions, it should just perform a single compare-and-swap .select(status=last_state).update(status=next_state) operation. +# A write() method does NOT need to enforce any timeouts or ratelimits, the tick() method should do that. +# A write() method should NOT have any branching logic or side effects like spawning other processes. +# ABX defines the following hookspecs for plugins to hook into write behavior: +# - abx.pm.hook.on__created(object) +# - abx.pm.hook.on__updated(object) +# - abx.pm.hook.on__deleted(object) + +# SIDEEFFECT: +# A sideeffect is a helper function defined in an app to be used by one or more tick() methods to perform a side effect that isn't a simple DB write or read. +# A sideeffect can spawn other processes, make 3rd-party API calls, write to the filesystem, etc. e.g. subprocess.Popen('wget https://example.com') +# A sideeffect should execute quickly and return early, it should try not to block for slow RPCs, subprocess jobs, or network operations. +# For slow or long-running sideeffects, spawn a separate background process and return immediately. Update the object's retry_at time and state as-needed so that a future tick() will check for any expected output from the background job. +# ABX defines the following hookspecs for plugins to hook into sideeffect behavior: +# - abx.pm.hook.on_sideeffect_xyz_started(object) +# - abx.pm.hook.on_sideeffect_xyz_succeeded(object) +# - abx.pm.hook.on_sideeffect_xyz_failed(object) @@ -99,6 +218,7 @@ def transition_snapshot_to_started(snapshot, config, cwd): fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'end_ts': None} snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update) + # trigger side effects on state transition (these just emit an event to a separate queue thats then processed by a huey worker) cleanup_snapshot_dir(snapshot, config, cwd) create_snapshot_pending_archiveresults(snapshot, config, cwd) update_snapshot_index_json(archiveresult, config, cwd) @@ -114,6 +234,7 @@ def transition_snapshot_to_sealed(snapshot, config, cwd): fields_to_update = {'status': 'sealed', 'retry_at': None, 'end_ts': time.now()} snapshot = abx.archivebox.writes.update_snapshot(filter_kwargs=snapshot_to_update, update_kwargs=fields_to_update) + # side effects: cleanup_snapshot_dir(snapshot, config, cwd) update_snapshot_index_json(snapshot, config, cwd) update_snapshot_index_html(snapshot, config, cwd) @@ -225,7 +346,7 @@ def transition_archiveresult_to_started(archiveresult, config, cwd): fields_to_update = {'status': 'started', 'retry_at': retry_at, 'retries': retries, 'start_ts': time.now(), 'output': None, 'error': None} archiveresult = abx.archivebox.writes.update_archiveresult(filter=archiveresult_to_update, update=fields_to_update) - + # side effects: with TimedProgress(): try: from .extractors import WARC_EXTRACTOR @@ -334,7 +455,7 @@ def on_crawl_created(crawl): @abx.hookimpl def on_snapshot_created(snapshot, config): - create_archiveresults_pending_from_snapshot(snapshot, config) + create_snapshot_pending_archiveresults(snapshot, config) # events @abx.hookimpl @@ -361,7 +482,7 @@ def scheduler_runloop(): try: abx.archivebox.events.on_crawl_schedule_tick(scheduled_crawl) except Exception as e: - abx.archivebox.events.on_crawl_schedule_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl) + abx.archivebox.events.on_crawl_schedule_tick_failure(timezone.now(), machine=Machine.objects.get_current_machine(), error=e, schedule=scheduled_crawl) # abx.archivebox.events.on_scheduler_tick_end(timezone.now(), machine=Machine.objects.get_current_machine(), tasks=scheduled_tasks_due) time.sleep(1) @@ -420,7 +541,7 @@ def create_root_snapshot(crawl): abx.archivebox.writes.update_crawl_stats(started_at=timezone.now()) -def create_archiveresults_pending_from_snapshot(snapshot, config): +def create_snapshot_pending_archiveresults(snapshot, config): config = get_scope_config( # defaults=settings.CONFIG_FROM_DEFAULTS, # configfile=settings.CONFIG_FROM_FILE, diff --git a/archivebox/abx/archivebox/writes.py b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py similarity index 98% rename from archivebox/abx/archivebox/writes.py rename to archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py index 0c4566b4..977543d2 100644 --- a/archivebox/abx/archivebox/writes.py +++ b/archivebox/pkgs/abx-spec-archivebox/abx_spec_archivebox/writes.py @@ -8,7 +8,6 @@ from benedict import benedict from django.conf import settings import abx -from .. import pm @abx.hookimpl @@ -88,7 +87,7 @@ def create_root_snapshot_from_seed(crawl): def create_archiveresults_pending_from_snapshot(snapshot, config): config = get_scope_config( # defaults=settings.CONFIG_FROM_DEFAULTS, - # configfile=settings.CONFIG_FROM_FILE, + # collection=settings.CONFIG_FROM_FILE, # environment=settings.CONFIG_FROM_ENVIRONMENT, persona=archiveresult.snapshot.crawl.persona, seed=archiveresult.snapshot.crawl.seed, diff --git a/archivebox/pkgs/abx-spec-archivebox/pyproject.toml b/archivebox/pkgs/abx-spec-archivebox/pyproject.toml new file mode 100644 index 00000000..349698a7 --- /dev/null +++ b/archivebox/pkgs/abx-spec-archivebox/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-archivebox" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_archivebox = "abx_spec_archivebox" diff --git a/archivebox/pkgs/abx-spec-config/README.md b/archivebox/pkgs/abx-spec-config/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py new file mode 100644 index 00000000..6aeedb71 --- /dev/null +++ b/archivebox/pkgs/abx-spec-config/abx_spec_config/__init__.py @@ -0,0 +1,151 @@ +__order__ = 100 + +import os +from pathlib import Path +from typing import Dict, Any, cast + +from benedict import benedict + + +import abx + +from .base_configset import BaseConfigSet, ConfigKeyStr + + +class ConfigPluginSpec: + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_collection_config_path(self) -> Path: + return Path(os.getcwd()) / "ArchiveBox.conf" + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_system_config_path(self) -> Path: + return Path('~/.config/abx/abx.conf').expanduser() + + + @abx.hookspec + @abx.hookimpl + def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}""" + return { + # override this in your plugin to return your plugin's config, e.g. + # 'ytdlp': YtdlpConfig(...), + } + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]: + """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}""" + return abx.as_dict(pm.hook.get_CONFIG()) + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]: + """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}""" + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in benedict(configset).items() + }) + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]: + """Get the config as it applies to you right now, based on the current context""" + return benedict({ + **pm.hook.get_default_config(default=default), + # **pm.hook.get_machine_config(machine), + **pm.hook.get_environment_config(environment=environment), + **pm.hook.get_collection_config(collection=collection), + **pm.hook.get_user_config(user=user), + **pm.hook.get_crawl_config(crawl=crawl), + **pm.hook.get_snapshot_config(snapshot=snapshot), + **pm.hook.get_archiveresult_config(archiveresult=archiveresult), + # **pm.hook.get_request_config(request=request), + **(extra or {}), + }) + + # @abx.hookspec(firstresult=True) + # @abx.hookimpl + # def get_request_config(self, request) -> dict: + # session = getattr(request, 'session', None) + # return getattr(session, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]: + return getattr(archiveresult, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]: + return getattr(snapshot, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]: + return getattr(crawl, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]: + return getattr(user, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file + # {} = empty dict, override to ignore the collection config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_collection().items() + }) if collection == ... else collection + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the environment config from the active environment variables + # {} = empty dict, override to ignore the environment config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_environment().items() + }) if environment == ... else environment + + # @abx.hookspec(firstresult=True) + # @abx.hookimpl + # def get_machine_config(self, machine=...) -> dict: + # # ... = ellipsis, means automatically get the machine config from the currently executing machine + # # {} = empty dict, override to ignore the machine config + # if machine == ...: + # machine = Machine.objects.get_current() + # return getattr(machine, 'config', None) or {} + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]: + # ... = ellipsis, means automatically get the machine config from the currently executing machine + # {} = empty dict, override to ignore the machine config + return benedict({ + key: value + for configset in pm.hook.get_CONFIGS().values() + for key, value in configset.from_defaults().items() + }) if default == ... else default + + + # TODO: add read_config_file(), write_config_file() hooks + + +PLUGIN_SPEC = ConfigPluginSpec + + +class ExpectedPluginSpec(ConfigPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/archivebox/abx/archivebox/base_configset.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py similarity index 72% rename from archivebox/abx/archivebox/base_configset.py rename to archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py index 3a6695a1..434db331 100644 --- a/archivebox/abx/archivebox/base_configset.py +++ b/archivebox/pkgs/abx-spec-config/abx_spec_config/base_configset.py @@ -1,36 +1,32 @@ -__package__ = 'abx.archivebox' +__package__ = 'abx_spec_config' import os import sys import re from pathlib import Path from typing import Type, Tuple, Callable, ClassVar, Dict, Any +from typing_extensions import Annotated import toml from rich import print from benedict import benedict -from pydantic import model_validator, TypeAdapter, AliasChoices +from pydantic import model_validator, TypeAdapter, AliasChoices, AfterValidator from pydantic_settings import BaseSettings, SettingsConfigDict, PydanticBaseSettingsSource from pydantic_settings.sources import TomlConfigSettingsSource -from pydantic_pkgr import func_takes_args_or_kwargs - +import abx from . import toml_util -PACKAGE_DIR = Path(__file__).resolve().parent.parent -DATA_DIR = Path(os.getcwd()).resolve() - -ARCHIVEBOX_CONFIG_FILE = DATA_DIR / "ArchiveBox.conf" -ARCHIVEBOX_CONFIG_FILE_BAK = ARCHIVEBOX_CONFIG_FILE.parent / ".ArchiveBox.conf.bak" - AUTOFIXES_HEADER = "[AUTOFIXES]" AUTOFIXES_SUBHEADER = "# The following config was added automatically to fix problems detected at startup:" _ALREADY_WARNED_ABOUT_UPDATED_CONFIG = set() +ConfigKeyStr = Annotated[str, AfterValidator(lambda x: x.isidentifier() and x.isupper() and not x.startswith('_'))] + class FlatTomlConfigSettingsSource(TomlConfigSettingsSource): """ @@ -98,9 +94,10 @@ class BaseConfigSet(BaseSettings): revalidate_instances="subclass-instances", ) - load_from_defaults: ClassVar[bool] = True - load_from_configfile: ClassVar[bool] = True - load_from_environment: ClassVar[bool] = True + load_from_defaults: ClassVar[bool] = True # read from schema defaults + load_from_system: ClassVar[bool] = True # read from ~/.config/abx/abx.conf + load_from_collection: ClassVar[bool] = True # read from ./ArchiveBox.conf + load_from_environment: ClassVar[bool] = True # read from environment variables @classmethod def settings_customise_sources( @@ -115,47 +112,41 @@ class BaseConfigSet(BaseSettings): # import ipdb; ipdb.set_trace() - precedence_order = {} + default_configs = [init_settings] if cls.load_from_defaults else [] + system_configs = [] + collection_configs = [] + environment_configs = [env_settings] if cls.load_from_environment else [] - # if ArchiveBox.conf does not exist yet, return defaults -> env order - if not ARCHIVEBOX_CONFIG_FILE.is_file(): - precedence_order = { - 'defaults': init_settings, - 'environment': env_settings, - } + # load system config from ~/.config/abx/abx.conf + SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path() + if cls.load_from_system and os.path.isfile(SYSTEM_CONFIG_FILE): + try: + system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)] + except Exception as err: + if err.__class__.__name__ == "TOMLDecodeError": + convert_ini_to_toml(SYSTEM_CONFIG_FILE) + system_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=SYSTEM_CONFIG_FILE)] + else: + raise + + COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path() + if cls.load_from_collection and os.path.isfile(COLLECTION_CONFIG_FILE): + try: + collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)] + except Exception as err: + if err.__class__.__name__ == "TOMLDecodeError": + convert_ini_to_toml(COLLECTION_CONFIG_FILE) + collection_configs = [FlatTomlConfigSettingsSource(settings_cls, toml_file=COLLECTION_CONFIG_FILE)] + else: + raise - # if ArchiveBox.conf exists and is in TOML format, return default -> TOML -> env order - try: - precedence_order = precedence_order or { - 'defaults': init_settings, - 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'environment': env_settings, - } - except Exception as err: - if err.__class__.__name__ != "TOMLDecodeError": - raise - # if ArchiveBox.conf exists and is in INI format, convert it then return default -> TOML -> env order - - # Convert ArchiveBox.conf in INI format to TOML and save original to .ArchiveBox.bak - original_ini = ARCHIVEBOX_CONFIG_FILE.read_text() - ARCHIVEBOX_CONFIG_FILE_BAK.write_text(original_ini) - new_toml = toml_util.convert(original_ini) - ARCHIVEBOX_CONFIG_FILE.write_text(new_toml) - - precedence_order = { - 'defaults': init_settings, - 'configfile': FlatTomlConfigSettingsSource(settings_cls, toml_file=ARCHIVEBOX_CONFIG_FILE), - 'environment': env_settings, - } - - if not cls.load_from_environment: - precedence_order.pop('environment') - if not cls.load_from_configfile: - precedence_order.pop('configfile') - if not cls.load_from_defaults: - precedence_order.pop('defaults') - - return tuple(precedence_order.values()) + precedence_order = [ + *default_configs, + *system_configs, + *collection_configs, + *environment_configs, + ] + return tuple(precedence_order) @model_validator(mode="after") def fill_defaults(self): @@ -173,7 +164,7 @@ class BaseConfigSet(BaseSettings): """Manual validation method, to be called from plugin/__init__.py:get_CONFIG()""" pass - def get_default_value(self, key): + def get_default_value(self, key: ConfigKeyStr): """Get the default value for a given config key""" field = self.model_fields[key] value = getattr(self, key) @@ -202,7 +193,9 @@ class BaseConfigSet(BaseSettings): Example acceptable use case: user config says SEARCH_BACKEND_ENGINE=sonic but sonic_client pip library is not installed so we cannot use it. SEARCH_BACKEND_CONFIG.update_in_place(SEARCH_BACKEND_ENGINE='ripgrep') can be used to reset it back to ripgrep so we can continue. """ - from archivebox.misc.toml_util import CustomTOMLEncoder + + COLLECTION_CONFIG_FILE = abx.pm.hook.get_collection_config_path() + # SYSTEM_CONFIG_FILE = abx.pm.hook.get_system_config_path() # silence warnings if they've already been shown once if all(key in _ALREADY_WARNED_ABOUT_UPDATED_CONFIG for key in kwargs.keys()): @@ -222,10 +215,10 @@ class BaseConfigSet(BaseSettings): # if persist=True, write config changes to data/ArchiveBox.conf [AUTOFIXES] section try: - if persist and ARCHIVEBOX_CONFIG_FILE.is_file(): - autofixes_to_add = benedict(kwargs).to_toml(encoder=CustomTOMLEncoder()) + if persist and COLLECTION_CONFIG_FILE.is_file(): + autofixes_to_add = benedict(kwargs).to_toml(encoder=toml_util.CustomTOMLEncoder()) - existing_config = ARCHIVEBOX_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip() + existing_config = COLLECTION_CONFIG_FILE.read_text().split(AUTOFIXES_HEADER, 1)[0].strip() if AUTOFIXES_HEADER in existing_config: existing_autofixes = existing_config.split(AUTOFIXES_HEADER, 1)[-1].strip().replace(AUTOFIXES_SUBHEADER, '').replace(AUTOFIXES_HEADER, '').strip() else: @@ -238,7 +231,7 @@ class BaseConfigSet(BaseSettings): existing_autofixes, autofixes_to_add, ] if line.strip()).strip() + '\n' - ARCHIVEBOX_CONFIG_FILE.write_text(new_config) + COLLECTION_CONFIG_FILE.write_text(new_config) except Exception: pass self.__init__() @@ -248,7 +241,7 @@ class BaseConfigSet(BaseSettings): return self @property - def aliases(self) -> Dict[str, str]: + def aliases(self) -> Dict[ConfigKeyStr, ConfigKeyStr]: alias_map = {} for key, field in self.model_fields.items(): alias_map[key] = key @@ -274,36 +267,36 @@ class BaseConfigSet(BaseSettings): return re.sub('([A-Z]+)', r'_\1', class_name).upper().strip('_') - def from_defaults(self) -> Dict[str, Any]: + def from_defaults(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the default values""" class OnlyDefaultsConfig(self.__class__): load_from_defaults = True - load_from_configfile = False + load_from_collection = False load_from_environment = False return benedict(OnlyDefaultsConfig().model_dump(exclude_unset=False, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_configfile(self) -> Dict[str, Any]: - """Get the dictionary of {key: value} config loaded from the configfile ArchiveBox.conf""" + def from_collection(self) -> Dict[ConfigKeyStr, Any]: + """Get the dictionary of {key: value} config loaded from the collection ArchiveBox.conf""" class OnlyConfigFileConfig(self.__class__): load_from_defaults = False - load_from_configfile = True + load_from_collection = True load_from_environment = False return benedict(OnlyConfigFileConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_environment(self) -> Dict[str, Any]: + def from_environment(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the environment variables""" class OnlyEnvironmentConfig(self.__class__): load_from_defaults = False - load_from_configfile = False + load_from_collection = False load_from_environment = True return benedict(OnlyEnvironmentConfig().model_dump(exclude_unset=True, exclude_defaults=False, exclude=set(self.model_computed_fields.keys()))) - def from_computed(self) -> Dict[str, Any]: + def from_computed(self) -> Dict[ConfigKeyStr, Any]: """Get the dictionary of {key: value} config loaded from the computed fields""" return benedict(self.model_dump(include=set(self.model_computed_fields.keys()))) - def to_toml_dict(self, defaults=False) -> Dict[str, Any]: + def to_toml_dict(self, defaults=False) -> Dict[ConfigKeyStr, Any]: """Get the current config as a TOML-ready dict""" config_dict = {} for key, value in benedict(self).items(): @@ -323,10 +316,24 @@ class BaseConfigSet(BaseSettings): return toml.dumps(toml_dict, encoder=CustomTOMLEncoder()) - def as_legacy_config_schema(self) -> Dict[str, Any]: - # shim for backwards compatibility with old config schema style - model_values = self.model_dump() - return benedict({ - key: {'type': field.annotation, 'default': model_values[key]} - for key, field in self.model_fields.items() - }) + + +def func_takes_args_or_kwargs(lambda_func: Callable[..., Any]) -> bool: + """returns True if a lambda func takes args/kwargs of any kind, otherwise false if it's pure/argless""" + code = lambda_func.__code__ + has_args = code.co_argcount > 0 + has_varargs = code.co_flags & 0x04 != 0 + has_varkw = code.co_flags & 0x08 != 0 + return has_args or has_varargs or has_varkw + + + + +def convert_ini_to_toml(ini_file: Path): + """Convert an INI file to a TOML file, saving the original to .ORIGINALNAME.bak""" + + bak_path = ini_file.parent / f'.{ini_file.name}.bak' + original_ini = ini_file.read_text() + bak_path.write_text(original_ini) + new_toml = toml_util.convert(original_ini) + ini_file.write_text(new_toml) diff --git a/archivebox/abx/archivebox/toml_util.py b/archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py similarity index 100% rename from archivebox/abx/archivebox/toml_util.py rename to archivebox/pkgs/abx-spec-config/abx_spec_config/toml_util.py diff --git a/archivebox/pkgs/abx-spec-config/pyproject.toml b/archivebox/pkgs/abx-spec-config/pyproject.toml new file mode 100644 index 00000000..aa2f6eb4 --- /dev/null +++ b/archivebox/pkgs/abx-spec-config/pyproject.toml @@ -0,0 +1,20 @@ +[project] +name = "abx-spec-config" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.34.0", + "pydantic>=2.9.2", + "pydantic-settings>=2.6.0", + "rich>=13.9.3", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_config = "abx_spec_config" diff --git a/archivebox/pkgs/abx-spec-django/README.md b/archivebox/pkgs/abx-spec-django/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-django/abx_spec_django.py b/archivebox/pkgs/abx-spec-django/abx_spec_django.py new file mode 100644 index 00000000..562dad72 --- /dev/null +++ b/archivebox/pkgs/abx-spec-django/abx_spec_django.py @@ -0,0 +1,118 @@ +__order__ = 300 + +import abx +from typing import List, Dict, Any, cast + +########################################################################################### + +class DjangoPluginSpec: + @abx.hookspec + def get_INSTALLED_APPS() -> List[str]: + return ['abx_spec_django'] + + @abx.hookspec + def get_TEMPLATE_DIRS() -> List[str]: + return [] # e.g. ['your_plugin_type/plugin_name/templates'] + + + @abx.hookspec + def get_STATICFILES_DIRS() -> List[str]: + return [] # e.g. ['your_plugin_type/plugin_name/static'] + + # @abx.hookspec + # def register_STATICFILES_DIRS(STATICFILES_DIRS): + # """Mutate STATICFILES_DIRS in place to add your static dirs in a specific position""" + # # e.g. STATICFILES_DIRS.insert(0, 'your_plugin_type/plugin_name/static') + # pass + + + @abx.hookspec + def get_MIDDLEWARES() -> List[str]: + return [] # e.g. ['your_plugin_type.plugin_name.middleware.YourMiddleware'] + + # @abx.hookspec + # def register_MIDDLEWARE(MIDDLEWARE): + # """Mutate MIDDLEWARE in place to add your middleware in a specific position""" + # # e.g. MIDDLEWARE.insert(0, 'your_plugin_type.plugin_name.middleware.YourMiddleware') + # pass + + + @abx.hookspec + def get_AUTHENTICATION_BACKENDS() -> List[str]: + return [] # e.g. ['django_auth_ldap.backend.LDAPBackend'] + + # @abx.hookspec + # def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS): + # """Mutate AUTHENTICATION_BACKENDS in place to add your auth backends in a specific position""" + # # e.g. AUTHENTICATION_BACKENDS.insert(0, 'your_plugin_type.plugin_name.backend.YourBackend') + # pass + + @abx.hookspec + def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME) -> Dict[str, Dict[str, Any]]: + return {} # e.g. {'some_queue_name': {'filename': 'some_queue_name.sqlite3', 'store_none': True, 'results': True, ...}} + + # @abx.hookspec + # def register_DJANGO_HUEY(DJANGO_HUEY): + # """Mutate DJANGO_HUEY in place to add your huey queues in a specific position""" + # # e.g. DJANGO_HUEY['queues']['some_queue_name']['some_setting'] = 'some_value' + # pass + + + @abx.hookspec + def get_ADMIN_DATA_VIEWS_URLS() -> List[str]: + return [] + + # @abx.hookspec + # def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS): + # """Mutate ADMIN_DATA_VIEWS in place to add your admin data views in a specific position""" + # # e.g. ADMIN_DATA_VIEWS['URLS'].insert(0, 'your_plugin_type/plugin_name/admin_data_views.py') + # pass + + + # @abx.hookspec + # def register_settings(settings): + # """Mutate settings in place to add your settings / modify existing settings""" + # # settings.SOME_KEY = 'some_value' + # pass + + + ########################################################################################### + + @abx.hookspec + def get_urlpatterns() -> List[str]: + return [] # e.g. [path('your_plugin_type/plugin_name/url.py', your_view)] + + # @abx.hookspec + # def register_urlpatterns(urlpatterns): + # """Mutate urlpatterns in place to add your urlpatterns in a specific position""" + # # e.g. urlpatterns.insert(0, path('your_plugin_type/plugin_name/url.py', your_view)) + # pass + + ########################################################################################### + + + + @abx.hookspec + def register_admin(admin_site) -> None: + """Register django admin views/models with the main django admin site instance""" + # e.g. admin_site.register(your_model, your_admin_class) + pass + + + ########################################################################################### + + + @abx.hookspec + def ready() -> None: + """Called when Django apps app.ready() are triggered""" + # e.g. abx.pm.hook.get_CONFIG().ytdlp.validate() + pass + + +PLUGIN_SPEC = DjangoPluginSpec + +class ExpectedPluginSpec(DjangoPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/archivebox/pkgs/abx-spec-django/pyproject.toml b/archivebox/pkgs/abx-spec-django/pyproject.toml new file mode 100644 index 00000000..09ed31ff --- /dev/null +++ b/archivebox/pkgs/abx-spec-django/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-django" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_django = "abx_spec_django" diff --git a/archivebox/pkgs/abx-spec-extractor/README.md b/archivebox/pkgs/abx-spec-extractor/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py b/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py new file mode 100644 index 00000000..74659467 --- /dev/null +++ b/archivebox/pkgs/abx-spec-extractor/abx_spec_extractor.py @@ -0,0 +1,211 @@ +import os + +from typing import Optional, List, Annotated, Tuple +from pathlib import Path + +from pydantic import AfterValidator +from pydantic_pkgr import BinName + + +import abx + + +def assert_no_empty_args(args: List[str]) -> List[str]: + assert all(len(arg) for arg in args) + return args + +ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())] + +HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))] +CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)] + + +@abx.hookspec +@abx.hookimpl +def get_EXTRACTORS(): + return [] + +@abx.hookspec +@abx.hookimpl +def extract(uri: str, config: dict | None=None): + return {} + +@abx.hookspec(firstresult=True) +@abx.hookimpl(trylast=True) +def should_extract(uri: str, extractor: str, config: dict | None=None): + return False + + +class BaseExtractor: + name: ExtractorName + binary: BinName + + default_args: CmdArgsList = [] + extra_args: CmdArgsList = [] + + def get_output_path(self, snapshot) -> Path: + return Path(self.__class__.__name__.lower()) + + def should_extract(self, uri: str, config: dict | None=None) -> bool: + try: + assert self.detect_installed_binary().version + except Exception: + raise + # could not load binary + return False + + # output_dir = self.get_output_path(snapshot) + # if output_dir.glob('*.*'): + # return False + return True + + # @abx.hookimpl + # def extract(self, snapshot_id: str) -> Dict[str, Any]: + # from core.models import Snapshot + # from archivebox import CONSTANTS + + # snapshot = Snapshot.objects.get(id=snapshot_id) + + # if not self.should_extract(snapshot.url): + # return {} + + # status = 'failed' + # start_ts = timezone.now() + # uplink = self.detect_network_interface() + # installed_binary = self.detect_installed_binary() + # machine = installed_binary.machine + # assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true + + # output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid) + # output_dir.mkdir(parents=True, exist_ok=True) + + # # execute the extractor binary with the given args + # args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args] + # cmd = [str(installed_binary.abspath), *args] + # proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir) + + # # collect the output + # end_ts = timezone.now() + # output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*')) + # stdout = proc.stdout.strip() + # stderr = proc.stderr.strip() + # output_json = None + # output_text = stdout + # try: + # output_json = json.loads(stdout.strip()) + # output_text = None + # except json.JSONDecodeError: + # pass + + # errors = [] + # if proc.returncode == 0: + # status = 'success' + # else: + # errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}') + + # # increment health stats counters + # if status == 'success': + # machine.record_health_success() + # uplink.record_health_success() + # installed_binary.record_health_success() + # else: + # machine.record_health_failure() + # uplink.record_health_failure() + # installed_binary.record_health_failure() + + # return { + # 'extractor': self.name, + + # 'snapshot': { + # 'id': snapshot.id, + # 'abid': snapshot.abid, + # 'url': snapshot.url, + # 'created_by_id': snapshot.created_by_id, + # }, + + # 'machine': { + # 'id': machine.id, + # 'abid': machine.abid, + # 'guid': machine.guid, + # 'hostname': machine.hostname, + # 'hw_in_docker': machine.hw_in_docker, + # 'hw_in_vm': machine.hw_in_vm, + # 'hw_manufacturer': machine.hw_manufacturer, + # 'hw_product': machine.hw_product, + # 'hw_uuid': machine.hw_uuid, + # 'os_arch': machine.os_arch, + # 'os_family': machine.os_family, + # 'os_platform': machine.os_platform, + # 'os_release': machine.os_release, + # 'os_kernel': machine.os_kernel, + # }, + + # 'uplink': { + # 'id': uplink.id, + # 'abid': uplink.abid, + # 'mac_address': uplink.mac_address, + # 'ip_public': uplink.ip_public, + # 'ip_local': uplink.ip_local, + # 'dns_server': uplink.dns_server, + # 'hostname': uplink.hostname, + # 'iface': uplink.iface, + # 'isp': uplink.isp, + # 'city': uplink.city, + # 'region': uplink.region, + # 'country': uplink.country, + # }, + + # 'binary': { + # 'id': installed_binary.id, + # 'abid': installed_binary.abid, + # 'name': installed_binary.name, + # 'binprovider': installed_binary.binprovider, + # 'abspath': installed_binary.abspath, + # 'version': installed_binary.version, + # 'sha256': installed_binary.sha256, + # }, + + # 'cmd': cmd, + # 'stdout': stdout, + # 'stderr': stderr, + # 'returncode': proc.returncode, + # 'start_ts': start_ts, + # 'end_ts': end_ts, + + # 'status': status, + # 'errors': errors, + # 'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)), + # 'output_files': output_files, + # 'output_json': output_json or {}, + # 'output_text': output_text or '', + # } + + # TODO: move this to a hookimpl + def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None): + cwd = cwd or Path(os.getcwd()) + binary = self.load_binary(installed_binary=installed_binary) + + return binary.exec(cmd=args, cwd=cwd) + + # @cached_property + @property + def BINARY(self): + # import abx.archivebox.reads + # for binary in abx.archivebox.reads.get_BINARIES().values(): + # if binary.name == self.binary: + # return binary + raise ValueError(f'Binary {self.binary} not found') + + def detect_installed_binary(self): + from machine.models import InstalledBinary + # hydrates binary from DB/cache if record of installed version is recent enough + # otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host + return InstalledBinary.objects.get_from_db_or_cache(self.BINARY) + + def load_binary(self, installed_binary=None): + installed_binary = installed_binary or self.detect_installed_binary() + return installed_binary.load_from_db() + + # def detect_network_interface(self): + # from machine.models import NetworkInterface + # return NetworkInterface.objects.current() diff --git a/archivebox/pkgs/abx-spec-extractor/pyproject.toml b/archivebox/pkgs/abx-spec-extractor/pyproject.toml new file mode 100644 index 00000000..5d49fef2 --- /dev/null +++ b/archivebox/pkgs/abx-spec-extractor/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-spec-extractor" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.26.0", + "pydantic>=2.5.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_extractor = "abx_spec_extractor" diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/README.md b/archivebox/pkgs/abx-spec-pydantic-pkgr/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py b/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py new file mode 100644 index 00000000..b95b3f33 --- /dev/null +++ b/archivebox/pkgs/abx-spec-pydantic-pkgr/abx_spec_pydantic_pkgr.py @@ -0,0 +1,114 @@ +__order__ = 200 + +import os + +from typing import Dict, cast +from pathlib import Path + +from pydantic_pkgr import Binary, BinProvider + +import abx + +from abx_spec_config import ConfigPluginSpec + +########################################################################################### + +class PydanticPkgrPluginSpec: + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_LIB_DIR(self) -> Path: + """Get the directory where shared runtime libraries/dependencies should be installed""" + FLAT_CONFIG = pm.hook.get_FLAT_CONFIG() + LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx')) + return LIB_DIR + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BIN_DIR(self) -> Path: + """Get the directory where binaries should be symlinked to""" + FLAT_CONFIG = pm.hook.get_FLAT_CONFIG() + LIB_DIR = pm.hook.get_LIB_DIR() + BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin') + return BIN_DIR + + @abx.hookspec + @abx.hookimpl + def get_BINPROVIDERS(self) -> Dict[str, BinProvider]: + return { + # to be implemented by plugins, e.g.: + # 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')), + } + + @abx.hookspec + @abx.hookimpl + def get_BINARIES(self) -> Dict[str, Binary]: + return { + # to be implemented by plugins, e.g.: + # 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]), + } + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider: + """Get a specific BinProvider by name""" + return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name] + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def get_BINARY(self, bin_name: str) -> Binary: + """Get a specific Binary by name""" + return abx.as_dict(pm.hook.get_BINARIES())[bin_name] + + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_load(self, binary: Binary, **kwargs) -> Binary: + """Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)""" + loaded_binary = binary.load(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_install(self, binary: Binary, **kwargs) -> Binary: + """Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)""" + loaded_binary = binary.install(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary: + """Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)""" + loaded_binary = binary.load_or_install(**kwargs) + pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary) + return loaded_binary + + @abx.hookspec(firstresult=True) + @abx.hookimpl + def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None): + if not (binary.abspath and os.path.isfile(binary.abspath)): + return + + BIN_DIR = pm.hook.get_BIN_DIR() + try: + BIN_DIR.mkdir(parents=True, exist_ok=True) + symlink = BIN_DIR / binary.name + symlink.unlink(missing_ok=True) + symlink.symlink_to(binary.abspath) + symlink.chmod(0o777) # make sure its executable by everyone + except Exception: + # print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}') + # not actually needed, we can just run without it + pass + + +PLUGIN_SPEC = PydanticPkgrPluginSpec + + +class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec): + pass + +TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable] +pm = cast(TypedPluginManager, abx.pm) diff --git a/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml b/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml new file mode 100644 index 00000000..67f1f62f --- /dev/null +++ b/archivebox/pkgs/abx-spec-pydantic-pkgr/pyproject.toml @@ -0,0 +1,17 @@ +[project] +name = "abx-spec-pydantic-pkgr" +version = "0.1.0" +description = "The ABX plugin specification for Binaries and BinProviders" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "pydantic-pkgr>=0.5.4", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr" diff --git a/archivebox/pkgs/abx-spec-searchbackend/README.md b/archivebox/pkgs/abx-spec-searchbackend/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py b/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py new file mode 100644 index 00000000..8bc53eb8 --- /dev/null +++ b/archivebox/pkgs/abx-spec-searchbackend/abx_spec_searchbackend.py @@ -0,0 +1,40 @@ +import abc +from typing import Iterable, List, Dict, cast + +import abx +from abx_spec_config import ConfigPluginSpec + + +class BaseSearchBackend(abc.ABC): + name: str + + @staticmethod + @abc.abstractmethod + def index(snapshot_id: str, texts: List[str]): + return + + @staticmethod + @abc.abstractmethod + def flush(snapshot_ids: Iterable[str]): + return + + @staticmethod + @abc.abstractmethod + def search(text: str) -> List[str]: + raise NotImplementedError("search method must be implemented by subclass") + + +class SearchBackendPluginSpec: + @abx.hookspec + @abx.hookimpl + def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]: + return {} + + +class ExpectedPluginSpec(SearchBackendPluginSpec, ConfigPluginSpec): + pass + +PLUGIN_SPEC = SearchBackendPluginSpec + +TypedPluginManager = abx.ABXPluginManager[ExpectedPluginSpec] +pm = cast(TypedPluginManager, abx.pm) diff --git a/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml b/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml new file mode 100644 index 00000000..2a9ac3ce --- /dev/null +++ b/archivebox/pkgs/abx-spec-searchbackend/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "abx-spec-searchbackend" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "abx>=0.1.0", + "python-benedict>=0.26.0", + "pydantic>=2.5.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project.entry-points.abx] +abx_spec_searchbackend = "abx_spec_searchbackend" diff --git a/archivebox/pkgs/abx/README.md b/archivebox/pkgs/abx/README.md new file mode 100644 index 00000000..e69de29b diff --git a/archivebox/pkgs/abx/abx.py b/archivebox/pkgs/abx/abx.py new file mode 100644 index 00000000..de4f0046 --- /dev/null +++ b/archivebox/pkgs/abx/abx.py @@ -0,0 +1,484 @@ +__package__ = 'abx' +__id__ = 'abx' +__label__ = 'ABX' +__author__ = 'Nick Sweeting' +__homepage__ = 'https://github.com/ArchiveBox' +__order__ = 0 + + +import inspect +import importlib +import itertools +from pathlib import Path +from typing import Dict, Callable, List, Set, Tuple, Iterable, Any, TypeVar, TypedDict, Type, cast, Generic, Mapping, overload, Final, ParamSpec, Literal, Protocol +from types import ModuleType +from typing_extensions import Annotated +from functools import cache + +from benedict import benedict +from pydantic import AfterValidator + +from pluggy import HookimplMarker, PluginManager, HookimplOpts, HookspecOpts, HookCaller + + + +ParamsT = ParamSpec("ParamsT") +ReturnT = TypeVar('ReturnT') + +class HookSpecDecoratorThatReturnsFirstResult(Protocol): + def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, ReturnT]: ... + +class HookSpecDecoratorThatReturnsListResults(Protocol): + def __call__(self, func: Callable[ParamsT, ReturnT]) -> Callable[ParamsT, List[ReturnT]]: ... + + +class TypedHookspecMarker: + """ + Improved version of pluggy.HookspecMarker that supports type inference of hookspecs with firstresult=True|False correctly + https://github.com/pytest-dev/pluggy/issues/191 + """ + + __slots__ = ('project_name',) + + def __init__(self, project_name: str) -> None: + self.project_name: Final[str] = project_name + + # handle @hookspec(firstresult=False) -> List[ReturnT] (test_firstresult_False_hookspec) + @overload + def __call__( + self, + function: None = ..., + firstresult: Literal[False] = ..., + historic: bool = ..., + warn_on_impl: Warning | None = ..., + warn_on_impl_args: Mapping[str, Warning] | None = ..., + ) -> HookSpecDecoratorThatReturnsListResults: ... + + # handle @hookspec(firstresult=True) -> ReturnT (test_firstresult_True_hookspec) + @overload + def __call__( + self, + function: None = ..., + firstresult: Literal[True] = ..., + historic: bool = ..., + warn_on_impl: Warning | None = ..., + warn_on_impl_args: Mapping[str, Warning] | None = ..., + ) -> HookSpecDecoratorThatReturnsFirstResult: ... + + # handle @hookspec -> List[ReturnT] (test_normal_hookspec) + # order matters!!! this one has to come last + @overload + def __call__( + self, + function: Callable[ParamsT, ReturnT] = ..., + firstresult: Literal[False] = ..., + historic: bool = ..., + warn_on_impl: None = ..., + warn_on_impl_args: None = ..., + ) -> Callable[ParamsT, List[ReturnT]]: ... + + def __call__( + self, + function: Callable[ParamsT, ReturnT] | None = None, + firstresult: bool = False, + historic: bool = False, + warn_on_impl: Warning | None = None, + warn_on_impl_args: Mapping[str, Warning] | None = None, + ) -> Callable[ParamsT, List[ReturnT]] | HookSpecDecoratorThatReturnsListResults | HookSpecDecoratorThatReturnsFirstResult: + + def setattr_hookspec_opts(func) -> Callable: + if historic and firstresult: + raise ValueError("cannot have a historic firstresult hook") + opts: HookspecOpts = { + "firstresult": firstresult, + "historic": historic, + "warn_on_impl": warn_on_impl, + "warn_on_impl_args": warn_on_impl_args, + } + setattr(func, self.project_name + "_spec", opts) + return func + + if function is not None: + return setattr_hookspec_opts(function) + else: + return setattr_hookspec_opts + + + + +spec = hookspec = TypedHookspecMarker("abx") +impl = hookimpl = HookimplMarker("abx") + + +def is_valid_attr_name(x: str) -> str: + assert x.isidentifier() and not x.startswith('_') + return x + +def is_valid_module_name(x: str) -> str: + assert x.isidentifier() and not x.startswith('_') and x.islower() + return x + +AttrName = Annotated[str, AfterValidator(is_valid_attr_name)] +PluginId = Annotated[str, AfterValidator(is_valid_module_name)] + + +class PluginInfo(TypedDict, total=True): + id: PluginId + package: AttrName + label: str + version: str + author: str + homepage: str + dependencies: List[str] + + source_code: str + hooks: Dict[AttrName, Callable] + module: ModuleType + + + +PluginSpec = TypeVar("PluginSpec") + +class ABXPluginManager(PluginManager, Generic[PluginSpec]): + """ + Patch to fix pluggy's PluginManager to work with pydantic models. + See: https://github.com/pytest-dev/pluggy/pull/536 + """ + + # enable static type checking of pm.hook.call() calls + # https://stackoverflow.com/a/62871889/2156113 + # https://github.com/pytest-dev/pluggy/issues/191 + hook: PluginSpec + + def create_typed_hookcaller(self, name: str, module_or_class: Type[PluginSpec], spec_opts: HookspecOpts) -> HookCaller: + """ + create a new HookCaller subclass with a modified __signature__ + so that the return type is correct and args are converted to kwargs + """ + TypedHookCaller = type('TypedHookCaller', (HookCaller,), {}) + + hookspec_signature = inspect.signature(getattr(module_or_class, name)) + hookspec_return_type = hookspec_signature.return_annotation + + # replace return type with list if firstresult=False + hookcall_return_type = hookspec_return_type if spec_opts['firstresult'] else List[hookspec_return_type] + + # replace each arg with kwarg equivalent (pm.hook.call() only accepts kwargs) + args_as_kwargs = [ + param.replace(kind=inspect.Parameter.KEYWORD_ONLY) if param.name != 'self' else param + for param in hookspec_signature.parameters.values() + ] + TypedHookCaller.__signature__ = hookspec_signature.replace(parameters=args_as_kwargs, return_annotation=hookcall_return_type) + TypedHookCaller.__name__ = f'{name}_HookCaller' + + return TypedHookCaller(name, self._hookexec, module_or_class, spec_opts) + + def add_hookspecs(self, module_or_class: Type[PluginSpec]) -> None: + """Add HookSpecs from the given class, (generic type allows us to enforce types of pm.hook.call() statically)""" + names = [] + for name in dir(module_or_class): + spec_opts = self.parse_hookspec_opts(module_or_class, name) + if spec_opts is not None: + hc: HookCaller | None = getattr(self.hook, name, None) + if hc is None: + hc = self.create_typed_hookcaller(name, module_or_class, spec_opts) + setattr(self.hook, name, hc) + else: + # Plugins registered this hook without knowing the spec. + hc.set_specification(module_or_class, spec_opts) + for hookfunction in hc.get_hookimpls(): + self._verify_hook(hc, hookfunction) + names.append(name) + + if not names: + raise ValueError( + f"did not find any {self.project_name!r} hooks in {module_or_class!r}" + ) + + def parse_hookimpl_opts(self, plugin, name: str) -> HookimplOpts | None: + # IMPORTANT: @property methods can have side effects, and are never hookimpl + # if attr is a property, skip it in advance + # plugin_class = plugin if inspect.isclass(plugin) else type(plugin) + if isinstance(getattr(plugin, name, None), property): + return None + + try: + return super().parse_hookimpl_opts(plugin, name) + except AttributeError: + return None + + +pm = ABXPluginManager("abx") + + + +def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int, Path]: + assert plugin + plugin_module = None + plugin_dir = None + + if isinstance(plugin, str) or isinstance(plugin, Path): + if str(plugin).endswith('.py'): + plugin_dir = Path(plugin).parent + elif '/' in str(plugin): + # assume it's a path to a plugin directory + plugin_dir = Path(plugin) + elif str(plugin).isidentifier(): + pass + + elif inspect.ismodule(plugin): + plugin_module = plugin + plugin_dir = Path(str(plugin_module.__file__)).parent + elif inspect.isclass(plugin): + plugin_module = plugin + plugin_dir = Path(inspect.getfile(plugin)).parent + else: + raise ValueError(f'Invalid plugin, cannot get order: {plugin}') + + if plugin_dir: + try: + # if .plugin_order file exists, use it to set the load priority + order = int((plugin_dir / '.plugin_order').read_text()) + assert -1000000 < order < 100000000 + return (order, plugin_dir) + except FileNotFoundError: + pass + + if plugin_module: + order = getattr(plugin_module, '__order__', 999) + else: + order = 999 + + assert order is not None + assert plugin_dir + + return (order, plugin_dir) + + +# @cache +def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo: + assert plugin + + # import the plugin module by its name + if isinstance(plugin, str): + module = importlib.import_module(plugin) + # print('IMPORTED PLUGIN:', plugin) + plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) + elif inspect.ismodule(plugin): + module = plugin + plugin = getattr(module, 'PLUGIN_SPEC', getattr(module, 'PLUGIN', module)) + elif inspect.isclass(plugin): + module = inspect.getmodule(plugin) + else: + raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}') + + assert module + + plugin_file = Path(inspect.getfile(module)) + plugin_package = module.__package__ or module.__name__ + plugin_id = plugin_package.replace('.', '_') + + # load the plugin info from the plugin/__init__.py __attr__s if they exist + plugin_module_attrs = { + 'label': getattr(module, '__label__', plugin_id), + 'version': getattr(module, '__version__', '0.0.1'), + 'author': getattr(module, '__author__', 'ArchiveBox'), + 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox'), + 'dependencies': getattr(module, '__dependencies__', []), + } + + # load the plugin info from the plugin/pyproject.toml file if it has one + plugin_toml_info = {} + try: + # try loading ./pyproject.toml first in case the plugin is a bare python file not inside a package dir + plugin_toml_info = benedict.from_toml((plugin_file.parent / 'pyproject.toml').read_text()).project + except Exception: + try: + # try loading ../pyproject.toml next in case the plugin is in a packge dir + plugin_toml_info = benedict.from_toml((plugin_file.parent.parent / 'pyproject.toml').read_text()).project + except Exception: + # print('WARNING: could not detect pyproject.toml for PLUGIN:', plugin_id, plugin_file.parent, 'ERROR:', e) + pass + + + assert plugin_id + assert plugin_package + assert module.__file__ + + # merge the plugin info from all sources + add dyanmically calculated info + return cast(PluginInfo, benedict(PluginInfo(**{ + 'id': plugin_id, + **plugin_module_attrs, + **plugin_toml_info, + 'package': plugin_package, + 'source_code': module.__file__, + 'order': get_plugin_order(plugin), + 'hooks': get_plugin_hooks(plugin), + 'module': module, + 'plugin': plugin, + }))) + + +def get_all_plugins() -> Dict[PluginId, PluginInfo]: + """Get the metadata for all the plugins registered with Pluggy.""" + plugins = {} + for plugin_module in pm.get_plugins(): + plugin_info = get_plugin(plugin=plugin_module) + assert 'id' in plugin_info + plugins[plugin_info['id']] = plugin_info + return benedict(plugins) + + +def get_all_hook_names() -> Set[str]: + """Get a set of all hook names across all plugins""" + return { + hook_name + for plugin_module in pm.get_plugins() + for hook_name in get_plugin_hooks(plugin_module) + } + + +def get_all_hook_specs() -> Dict[str, Dict[str, Any]]: + """Get a set of all hookspec methods defined in all plugins (useful for type checking if a pm.hook.call() is valid)""" + hook_specs = {} + + for hook_name in get_all_hook_names(): + for plugin_module in pm.get_plugins(): + if hasattr(plugin_module, hook_name): + hookspecopts = pm.parse_hookspec_opts(plugin_module, hook_name) + if hookspecopts: + method = getattr(plugin_module, hook_name) + signature = inspect.signature(method) + return_type = signature.return_annotation if signature.return_annotation != inspect._empty else None + + if hookspecopts.get('firstresult'): + return_type = return_type + else: + # if not firstresult, return_type is a sequence + return_type = List[return_type] + + call_signature = signature.replace(return_annotation=return_type) + method = lambda *args, **kwargs: getattr(pm.hook, hook_name)(*args, **kwargs) + method.__signature__ = call_signature + method.__name__ = hook_name + method.__package__ = plugin_module.__package__ + + hook_specs[hook_name] = { + 'name': hook_name, + 'method': method, + 'signature': call_signature, + 'hookspec_opts': hookspecopts, + 'hookspec_signature': signature, + 'hookspec_plugin': plugin_module.__package__, + } + return hook_specs + + + +###### PLUGIN DISCOVERY AND LOADING ######################################################## + + +def find_plugins_in_dir(plugins_dir: Path) -> Dict[PluginId, Path]: + """ + Find all the plugins in a given directory. Just looks for an __init__.py file. + """ + python_dirs = plugins_dir.glob("*/__init__.py") + sorted_python_dirs = sorted(python_dirs, key=lambda p: get_plugin_order(plugin=p) or 500) + + return { + plugin_entrypoint.parent.name: plugin_entrypoint.parent + for plugin_entrypoint in sorted_python_dirs + if plugin_entrypoint.parent.name not in ('abx', 'core') + } + + +def get_pip_installed_plugins(group: PluginId='abx') -> Dict[PluginId, Path]: + """replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip""" + import importlib.metadata + + DETECTED_PLUGINS = {} # module_name: module_dir_path + for dist in list(importlib.metadata.distributions()): + for entrypoint in dist.entry_points: + if entrypoint.group != group or pm.is_blocked(entrypoint.name): + continue + DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent + # pm.register(plugin, name=ep.name) + # pm._plugin_distinfo.append((plugin, DistFacade(dist))) + return DETECTED_PLUGINS + + + +# Load all plugins from pip packages, archivebox built-ins, and user plugins +def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId, Path]): + """ + Load all the plugins from a dictionary of module names and directory paths. + """ + PLUGINS_TO_LOAD = [] + LOADED_PLUGINS = {} + + for plugin in plugins: + plugin_info = get_plugin(plugin) + assert plugin_info, f'No plugin metadata found for {plugin}' + assert 'id' in plugin_info and 'module' in plugin_info + if plugin_info['module'] in pm.get_plugins(): + LOADED_PLUGINS[plugin_info['id']] = plugin_info + continue + else: + PLUGINS_TO_LOAD.append(plugin_info) + + PLUGINS_TO_LOAD = sorted(PLUGINS_TO_LOAD, key=lambda x: x['order']) + + for plugin_info in PLUGINS_TO_LOAD: + pm.register(plugin_info['module']) + LOADED_PLUGINS[plugin_info['id']] = plugin_info + # print(f' √ Loaded plugin: {plugin_id}') + return benedict(LOADED_PLUGINS) + +@cache +def get_plugin_hooks(plugin: PluginId | ModuleType | Type | None) -> Dict[AttrName, Callable]: + """Get all the functions marked with @hookimpl on a module.""" + if not plugin: + return {} + + hooks = {} + + if isinstance(plugin, str): + plugin_module = importlib.import_module(plugin) + elif inspect.ismodule(plugin) or inspect.isclass(plugin): + plugin_module = plugin + else: + raise ValueError(f'Invalid plugin, cannot get hooks: {plugin}') + + for attr_name in dir(plugin_module): + if attr_name.startswith('_'): + continue + try: + attr = getattr(plugin_module, attr_name) + if isinstance(attr, Callable): + if pm.parse_hookimpl_opts(plugin_module, attr_name): + hooks[attr_name] = attr + except Exception as e: + print(f'Error getting hookimpls for {plugin}: {e}') + + return hooks + +ReturnT = TypeVar('ReturnT') + +def as_list(results: List[List[ReturnT]]) -> List[ReturnT]: + """Flatten a list of lists returned by a pm.hook.call() into a single list""" + return list(itertools.chain(*results)) + + +def as_dict(results: List[Dict[PluginId, ReturnT]]) -> Dict[PluginId, ReturnT]: + """Flatten a list of dicts returned by a pm.hook.call() into a single dict""" + + if isinstance(results, (dict, benedict)): + results_list = results.values() + else: + results_list = results + + return benedict({ + result_id: result + for plugin_results in results_list + for result_id, result in plugin_results.items() + }) diff --git a/archivebox/pkgs/abx/pyproject.toml b/archivebox/pkgs/abx/pyproject.toml new file mode 100644 index 00000000..3c185653 --- /dev/null +++ b/archivebox/pkgs/abx/pyproject.toml @@ -0,0 +1,14 @@ +[project] +name = "abx" +version = "0.1.0" +description = "The common shared interfaces for the ABX ArchiveBox plugin ecosystem." +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "pluggy>=1.5.0", + "django>=5.1.1,<6.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" diff --git a/archivebox/plugins_extractor/archivedotorg/__init__.py b/archivebox/plugins_extractor/archivedotorg/__init__.py deleted file mode 100644 index a5c24932..00000000 --- a/archivebox/plugins_extractor/archivedotorg/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_extractor.archivedotorg' -__label__ = 'archivedotorg' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://archive.org' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'archivedotorg': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import ARCHIVEDOTORG_CONFIG - - return { - 'archivedotorg': ARCHIVEDOTORG_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import ARCHIVEDOTORG_EXTRACTOR -# -# return { -# 'archivedotorg': ARCHIVEDOTORG_EXTRACTOR, -# } diff --git a/archivebox/plugins_extractor/chrome/__init__.py b/archivebox/plugins_extractor/chrome/__init__.py deleted file mode 100644 index 016cd292..00000000 --- a/archivebox/plugins_extractor/chrome/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -__package__ = 'plugins_extractor.chrome' -__id__ = 'chrome' -__label__ = 'Chrome' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/main/archivebox/plugins_extractor/chrome' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import CHROME_CONFIG - - return { - __id__: CHROME_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import CHROME_BINARY - - return { - 'chrome': CHROME_BINARY, - } - -@abx.hookimpl -def ready(): - from .config import CHROME_CONFIG - CHROME_CONFIG.validate() - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# return { -# 'pdf': PDF_EXTRACTOR, -# 'screenshot': SCREENSHOT_EXTRACTOR, -# 'dom': DOM_EXTRACTOR, -# } diff --git a/archivebox/plugins_extractor/curl/__init__.py b/archivebox/plugins_extractor/curl/__init__.py deleted file mode 100644 index 99af0107..00000000 --- a/archivebox/plugins_extractor/curl/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -__package__ = 'plugins_extractor.curl' -__label__ = 'curl' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/curl/curl' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'curl': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import CURL_CONFIG - - return { - 'curl': CURL_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import CURL_BINARY - - return { - 'curl': CURL_BINARY, - } diff --git a/archivebox/plugins_extractor/favicon/__init__.py b/archivebox/plugins_extractor/favicon/__init__.py deleted file mode 100644 index 3fa84560..00000000 --- a/archivebox/plugins_extractor/favicon/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_extractor.favicon' -__label__ = 'favicon' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'favicon': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import FAVICON_CONFIG - - return { - 'favicon': FAVICON_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import FAVICON_EXTRACTOR - -# return { -# 'favicon': FAVICON_EXTRACTOR, -# } diff --git a/archivebox/plugins_extractor/git/__init__.py b/archivebox/plugins_extractor/git/__init__.py deleted file mode 100644 index db18919f..00000000 --- a/archivebox/plugins_extractor/git/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.git' -__label__ = 'git' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/git/git' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'git': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import GIT_CONFIG - - return { - 'git': GIT_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import GIT_BINARY - - return { - 'git': GIT_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import GIT_EXTRACTOR - - return { - 'git': GIT_EXTRACTOR, - } diff --git a/archivebox/plugins_extractor/git/extractors.py b/archivebox/plugins_extractor/git/extractors.py deleted file mode 100644 index 350f1b82..00000000 --- a/archivebox/plugins_extractor/git/extractors.py +++ /dev/null @@ -1,17 +0,0 @@ -__package__ = 'plugins_extractor.git' - -from pathlib import Path - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import GIT_BINARY - - -class GitExtractor(BaseExtractor): - name: ExtractorName = 'git' - binary: str = GIT_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - return snapshot.as_link() / 'git' - -GIT_EXTRACTOR = GitExtractor() diff --git a/archivebox/plugins_extractor/htmltotext/__init__.py b/archivebox/plugins_extractor/htmltotext/__init__.py deleted file mode 100644 index 0f2b756c..00000000 --- a/archivebox/plugins_extractor/htmltotext/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -__package__ = 'plugins_extractor.htmltotext' -__id__ = 'htmltotext' -__label__ = 'HTML-to-Text' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import HTMLTOTEXT_CONFIG - - return { - __id__: HTMLTOTEXT_CONFIG - } - - -# @abx.hookimpl -# def get_EXTRACTORS(): -# from .extractors import FAVICON_EXTRACTOR - -# return { -# 'htmltotext': FAVICON_EXTRACTOR, -# } diff --git a/archivebox/plugins_extractor/mercury/__init__.py b/archivebox/plugins_extractor/mercury/__init__.py deleted file mode 100644 index 10aca671..00000000 --- a/archivebox/plugins_extractor/mercury/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.mercury' -__label__ = 'mercury' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/postlight/mercury-parser' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'mercury': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import MERCURY_CONFIG - - return { - 'mercury': MERCURY_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import MERCURY_BINARY - - return { - 'mercury': MERCURY_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import MERCURY_EXTRACTOR - - return { - 'mercury': MERCURY_EXTRACTOR, - } diff --git a/archivebox/plugins_extractor/mercury/extractors.py b/archivebox/plugins_extractor/mercury/extractors.py deleted file mode 100644 index 5d91b0e0..00000000 --- a/archivebox/plugins_extractor/mercury/extractors.py +++ /dev/null @@ -1,19 +0,0 @@ -__package__ = 'plugins_extractor.mercury' - -from pathlib import Path - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import MERCURY_BINARY - - - -class MercuryExtractor(BaseExtractor): - name: ExtractorName = 'mercury' - binary: str = MERCURY_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - return snapshot.link_dir / 'mercury' / 'content.html' - - -MERCURY_EXTRACTOR = MercuryExtractor() diff --git a/archivebox/plugins_extractor/pocket/__init__.py b/archivebox/plugins_extractor/pocket/__init__.py deleted file mode 100644 index bf09435f..00000000 --- a/archivebox/plugins_extractor/pocket/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.pocket' -__id__ = 'pocket' -__label__ = 'pocket' -__version__ = '2024.10.21' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/pocket' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import POCKET_CONFIG - - return { - __id__: POCKET_CONFIG - } - -@abx.hookimpl -def ready(): - from .config import POCKET_CONFIG - POCKET_CONFIG.validate() diff --git a/archivebox/plugins_extractor/readability/__init__.py b/archivebox/plugins_extractor/readability/__init__.py deleted file mode 100644 index 2ef1a1a8..00000000 --- a/archivebox/plugins_extractor/readability/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_extractor.readability' -__label__ = 'readability' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/readability-extractor' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'readability': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import READABILITY_CONFIG - - return { - 'readability': READABILITY_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import READABILITY_BINARY - - return { - 'readability': READABILITY_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import READABILITY_EXTRACTOR - - return { - 'readability': READABILITY_EXTRACTOR, - } diff --git a/archivebox/plugins_extractor/readability/extractors.py b/archivebox/plugins_extractor/readability/extractors.py deleted file mode 100644 index eb8ea165..00000000 --- a/archivebox/plugins_extractor/readability/extractors.py +++ /dev/null @@ -1,20 +0,0 @@ -__package__ = 'plugins_extractor.readability' - -from pathlib import Path - -from pydantic_pkgr import BinName - -from abx.archivebox.base_extractor import BaseExtractor - -from .binaries import READABILITY_BINARY - - -class ReadabilityExtractor(BaseExtractor): - name: str = 'readability' - binary: BinName = READABILITY_BINARY.name - - def get_output_path(self, snapshot) -> Path: - return Path(snapshot.link_dir) / 'readability' / 'content.html' - - -READABILITY_EXTRACTOR = ReadabilityExtractor() diff --git a/archivebox/plugins_extractor/readwise/__init__.py b/archivebox/plugins_extractor/readwise/__init__.py deleted file mode 100644 index 002eb58b..00000000 --- a/archivebox/plugins_extractor/readwise/__init__.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.readwise' -__id__ = 'readwise' -__label__ = 'readwise' -__version__ = '2024.10.21' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import READWISE_CONFIG - - return { - __id__: READWISE_CONFIG - } - -@abx.hookimpl -def ready(): - from .config import READWISE_CONFIG - READWISE_CONFIG.validate() diff --git a/archivebox/plugins_extractor/readwise/config.py b/archivebox/plugins_extractor/readwise/config.py deleted file mode 100644 index 106aaf06..00000000 --- a/archivebox/plugins_extractor/readwise/config.py +++ /dev/null @@ -1,17 +0,0 @@ -__package__ = 'plugins_extractor.readwise' - -from typing import Dict -from pathlib import Path - -from pydantic import Field - -from abx.archivebox.base_configset import BaseConfigSet - -from archivebox.config import CONSTANTS - - -class ReadwiseConfig(BaseConfigSet): - READWISE_DB_PATH: Path = Field(default=CONSTANTS.SOURCES_DIR / "readwise_reader_api.db") - READWISE_READER_TOKENS: Dict[str, str] = Field(default=lambda: {}) # {: , ...} - -READWISE_CONFIG = ReadwiseConfig() diff --git a/archivebox/plugins_extractor/singlefile/__init__.py b/archivebox/plugins_extractor/singlefile/__init__.py deleted file mode 100644 index cd72adb8..00000000 --- a/archivebox/plugins_extractor/singlefile/__init__.py +++ /dev/null @@ -1,51 +0,0 @@ -__package__ = 'plugins_extractor.singlefile' -__label__ = 'singlefile' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/gildas-lormeau/singlefile' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'singlefile': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import SINGLEFILE_CONFIG - - return { - 'singlefile': SINGLEFILE_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import SINGLEFILE_BINARY - - return { - 'singlefile': SINGLEFILE_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import SINGLEFILE_EXTRACTOR - - return { - 'singlefile': SINGLEFILE_EXTRACTOR, - } - -# @abx.hookimpl -# def get_INSTALLED_APPS(): -# # needed to load ./models.py -# return [__package__] diff --git a/archivebox/plugins_extractor/singlefile/extractors.py b/archivebox/plugins_extractor/singlefile/extractors.py deleted file mode 100644 index fedbe801..00000000 --- a/archivebox/plugins_extractor/singlefile/extractors.py +++ /dev/null @@ -1,19 +0,0 @@ -__package__ = 'plugins_extractor.singlefile' - -from pathlib import Path - -from pydantic_pkgr import BinName -from abx.archivebox.base_extractor import BaseExtractor - -from .binaries import SINGLEFILE_BINARY - - -class SinglefileExtractor(BaseExtractor): - name: str = 'singlefile' - binary: BinName = SINGLEFILE_BINARY.name - - def get_output_path(self, snapshot) -> Path: - return Path(snapshot.link_dir) / 'singlefile.html' - - -SINGLEFILE_EXTRACTOR = SinglefileExtractor() diff --git a/archivebox/plugins_extractor/wget/__init__.py b/archivebox/plugins_extractor/wget/__init__.py deleted file mode 100644 index e2a36aa4..00000000 --- a/archivebox/plugins_extractor/wget/__init__.py +++ /dev/null @@ -1,54 +0,0 @@ -__package__ = 'plugins_extractor.wget' -__id__ = 'wget' -__label__ = 'WGET' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/wget' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - __id__: { - 'id': __id__, - 'package': __package__, - 'label': __label__, - 'version': __version__, - 'author': __author__, - 'homepage': __homepage__, - 'dependencies': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import WGET_CONFIG - - return { - __id__: WGET_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import WGET_BINARY - - return { - 'wget': WGET_BINARY, - } - -@abx.hookimpl -def get_EXTRACTORS(): - from .extractors import WGET_EXTRACTOR, WARC_EXTRACTOR - - return { - 'wget': WGET_EXTRACTOR, - 'warc': WARC_EXTRACTOR, - } - -@abx.hookimpl -def ready(): - from .config import WGET_CONFIG - WGET_CONFIG.validate() diff --git a/archivebox/plugins_extractor/wget/extractors.py b/archivebox/plugins_extractor/wget/extractors.py deleted file mode 100644 index 86fa3923..00000000 --- a/archivebox/plugins_extractor/wget/extractors.py +++ /dev/null @@ -1,37 +0,0 @@ -__package__ = 'plugins_extractor.wget' - -from pathlib import Path - -from pydantic_pkgr import BinName - -from abx.archivebox.base_extractor import BaseExtractor, ExtractorName - -from .binaries import WGET_BINARY -from .wget_util import wget_output_path - -class WgetExtractor(BaseExtractor): - name: ExtractorName = 'wget' - binary: BinName = WGET_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - wget_index_path = wget_output_path(snapshot.as_link()) - if wget_index_path: - return Path(wget_index_path) - return None - -WGET_EXTRACTOR = WgetExtractor() - - -class WarcExtractor(BaseExtractor): - name: ExtractorName = 'warc' - binary: BinName = WGET_BINARY.name - - def get_output_path(self, snapshot) -> Path | None: - warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) - if warc_files: - return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] - return None - - -WARC_EXTRACTOR = WarcExtractor() - diff --git a/archivebox/plugins_pkg/npm/binproviders.py b/archivebox/plugins_pkg/npm/binproviders.py deleted file mode 100644 index b1b83168..00000000 --- a/archivebox/plugins_pkg/npm/binproviders.py +++ /dev/null @@ -1,42 +0,0 @@ -__package__ = 'plugins_pkg.npm' - -from pathlib import Path -from typing import Optional - -from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName - -from archivebox.config import DATA_DIR, CONSTANTS - -from abx.archivebox.base_binary import BaseBinProvider - - - -OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin' -NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin' - - -class SystemNpmBinProvider(NpmProvider, BaseBinProvider): - name: BinProviderName = "sys_npm" - - npm_prefix: Optional[Path] = None - - -class LibNpmBinProvider(NpmProvider, BaseBinProvider): - name: BinProviderName = "lib_npm" - PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' - - npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm' - - def setup(self) -> None: - # update paths from config if they arent the default - from archivebox.config.common import STORAGE_CONFIG - if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR: - self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm' - self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}' - - super().setup() - - -SYS_NPM_BINPROVIDER = SystemNpmBinProvider() -LIB_NPM_BINPROVIDER = LibNpmBinProvider() -npm = LIB_NPM_BINPROVIDER diff --git a/archivebox/plugins_pkg/puppeteer/__init__.py b/archivebox/plugins_pkg/puppeteer/__init__.py deleted file mode 100644 index 7acc5b1b..00000000 --- a/archivebox/plugins_pkg/puppeteer/__init__.py +++ /dev/null @@ -1,46 +0,0 @@ -__package__ = 'plugins_pkg.puppeteer' -__label__ = 'puppeteer' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/puppeteer/puppeteer' -__dependencies__ = ['npm'] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'puppeteer': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import PUPPETEER_CONFIG - - return { - 'puppeteer': PUPPETEER_CONFIG - } - -@abx.hookimpl -def get_BINARIES(): - from .binaries import PUPPETEER_BINARY - - return { - 'puppeteer': PUPPETEER_BINARY, - } - -@abx.hookimpl -def get_BINPROVIDERS(): - from .binproviders import PUPPETEER_BINPROVIDER - - return { - 'puppeteer': PUPPETEER_BINPROVIDER, - } diff --git a/archivebox/plugins_search/ripgrep/__init__.py b/archivebox/plugins_search/ripgrep/__init__.py deleted file mode 100644 index ac1e417c..00000000 --- a/archivebox/plugins_search/ripgrep/__init__.py +++ /dev/null @@ -1,48 +0,0 @@ -__package__ = 'plugins_search.ripgrep' -__label__ = 'ripgrep' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/BurntSushi/ripgrep' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'ripgrep': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import RIPGREP_CONFIG - - return { - 'ripgrep': RIPGREP_CONFIG - } - - -@abx.hookimpl -def get_BINARIES(): - from .binaries import RIPGREP_BINARY - - return { - 'ripgrep': RIPGREP_BINARY - } - - -@abx.hookimpl -def get_SEARCHBACKENDS(): - from .searchbackend import RIPGREP_SEARCH_BACKEND - - return { - 'ripgrep': RIPGREP_SEARCH_BACKEND, - } diff --git a/archivebox/plugins_search/sqlitefts/__init__.py b/archivebox/plugins_search/sqlitefts/__init__.py deleted file mode 100644 index 63fb1b12..00000000 --- a/archivebox/plugins_search/sqlitefts/__init__.py +++ /dev/null @@ -1,39 +0,0 @@ -__package__ = 'plugins_search.sqlitefts' -__label__ = 'sqlitefts' -__version__ = '2024.10.14' -__author__ = 'ArchiveBox' -__homepage__ = 'https://github.com/ArchiveBox/archivebox' -__dependencies__ = [] - -import abx - - -@abx.hookimpl -def get_PLUGIN(): - return { - 'sqlitefts': { - 'PACKAGE': __package__, - 'LABEL': __label__, - 'VERSION': __version__, - 'AUTHOR': __author__, - 'HOMEPAGE': __homepage__, - 'DEPENDENCIES': __dependencies__, - } - } - -@abx.hookimpl -def get_CONFIG(): - from .config import SQLITEFTS_CONFIG - - return { - 'sqlitefts': SQLITEFTS_CONFIG - } - - -@abx.hookimpl -def get_SEARCHBACKENDS(): - from .searchbackend import SQLITEFTS_SEARCH_BACKEND - - return { - 'sqlitefts': SQLITEFTS_SEARCH_BACKEND, - } diff --git a/archivebox/queues/supervisor_util.py b/archivebox/queues/supervisor_util.py index f181da08..0a4285f8 100644 --- a/archivebox/queues/supervisor_util.py +++ b/archivebox/queues/supervisor_util.py @@ -26,6 +26,23 @@ CONFIG_FILE_NAME = "supervisord.conf" PID_FILE_NAME = "supervisord.pid" WORKERS_DIR_NAME = "workers" +SCHEDULER_WORKER = { + "name": "worker_scheduler", + "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks", + "autostart": "true", + "autorestart": "true", + "stdout_logfile": "logs/worker_scheduler.log", + "redirect_stderr": "true", +} +COMMAND_WORKER = { + "name": "worker_commands", + "command": "archivebox manage djangohuey --queue commands -w 4 -k thread --no-periodic --disable-health-check", + "autostart": "true", + "autorestart": "true", + "stdout_logfile": "logs/worker_commands.log", + "redirect_stderr": "true", +} + @cache def get_sock_file(): """Get the path to the supervisord socket file, symlinking to a shorter path if needed due to unix path length limits""" @@ -84,33 +101,35 @@ files = {WORKERS_DIR}/*.conf """ CONFIG_FILE.write_text(config_content) - Path.mkdir(WORKERS_DIR, exist_ok=True) + Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True) + (WORKERS_DIR / 'initial_startup.conf').write_text('') # hides error about "no files found to include" when supervisord starts def create_worker_config(daemon): + """Create a supervisord worker config file for a given daemon""" SOCK_FILE = get_sock_file() WORKERS_DIR = SOCK_FILE.parent / WORKERS_DIR_NAME - Path.mkdir(WORKERS_DIR, exist_ok=True) + Path.mkdir(WORKERS_DIR, exist_ok=True, parents=True) name = daemon['name'] - configfile = WORKERS_DIR / f"{name}.conf" + worker_conf = WORKERS_DIR / f"{name}.conf" - config_content = f"[program:{name}]\n" + worker_str = f"[program:{name}]\n" for key, value in daemon.items(): if key == 'name': continue - config_content += f"{key}={value}\n" - config_content += "\n" + worker_str += f"{key}={value}\n" + worker_str += "\n" - configfile.write_text(config_content) + worker_conf.write_text(worker_str) def get_existing_supervisord_process(): SOCK_FILE = get_sock_file() try: transport = SupervisorTransport(None, None, f"unix://{SOCK_FILE}") - server = ServerProxy("http://localhost", transport=transport) + server = ServerProxy("http://localhost", transport=transport) # user:pass@localhost doesn't work for some reason with unix://.sock, cant seem to silence CRIT no-auth warning current_state = cast(Dict[str, int | str], server.supervisor.getState()) if current_state["statename"] == "RUNNING": pid = server.supervisor.getPID() @@ -127,6 +146,7 @@ def stop_existing_supervisord_process(): PID_FILE = SOCK_FILE.parent / PID_FILE_NAME try: + # if pid file exists, load PID int try: pid = int(PID_FILE.read_text()) except (FileNotFoundError, ValueError): @@ -136,15 +156,15 @@ def stop_existing_supervisord_process(): print(f"[🦸‍♂️] Stopping supervisord process (pid={pid})...") proc = psutil.Process(pid) proc.terminate() - proc.wait() - except (Exception, BrokenPipeError, IOError): + proc.wait(timeout=5) + except (BaseException, BrokenPipeError, IOError, KeyboardInterrupt): pass finally: try: # clear PID file and socket file PID_FILE.unlink(missing_ok=True) get_sock_file().unlink(missing_ok=True) - except Exception: + except BaseException: pass def start_new_supervisord_process(daemonize=False): @@ -278,47 +298,6 @@ def start_worker(supervisor, daemon, lazy=False): raise Exception(f"Failed to start worker {daemon['name']}! Only found: {procs}") -def watch_worker(supervisor, daemon_name, interval=5): - """loop continuously and monitor worker's health""" - while True: - proc = get_worker(supervisor, daemon_name) - if not proc: - raise Exception("Worker dissapeared while running! " + daemon_name) - - if proc['statename'] == 'STOPPED': - return proc - - if proc['statename'] == 'RUNNING': - time.sleep(1) - continue - - if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'): - print(f'[🦸‍♂️] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}') - time.sleep(interval) - continue - -def tail_worker_logs(log_path: str): - get_or_create_supervisord_process(daemonize=False) - - from rich.live import Live - from rich.table import Table - - table = Table() - table.add_column("TS") - table.add_column("URL") - - try: - with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid - with open(log_path, 'r') as f: - for line in follow(f): - if '://' in line: - live.console.print(f"Working on: {line.strip()}") - # table.add_row("123124234", line.strip()) - except (KeyboardInterrupt, BrokenPipeError, IOError): - STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...") - except SystemExit: - pass - def get_worker(supervisor, daemon_name): try: return supervisor.getProcessInfo(daemon_name) @@ -350,28 +329,55 @@ def stop_worker(supervisor, daemon_name): raise Exception(f"Failed to stop worker {daemon_name}!") +def tail_worker_logs(log_path: str): + get_or_create_supervisord_process(daemonize=False) + + from rich.live import Live + from rich.table import Table + + table = Table() + table.add_column("TS") + table.add_column("URL") + + try: + with Live(table, refresh_per_second=1) as live: # update 4 times a second to feel fluid + with open(log_path, 'r') as f: + for line in follow(f): + if '://' in line: + live.console.print(f"Working on: {line.strip()}") + # table.add_row("123124234", line.strip()) + except (KeyboardInterrupt, BrokenPipeError, IOError): + STDERR.print("\n[🛑] Got Ctrl+C, stopping gracefully...") + except SystemExit: + pass + +def watch_worker(supervisor, daemon_name, interval=5): + """loop continuously and monitor worker's health""" + while True: + proc = get_worker(supervisor, daemon_name) + if not proc: + raise Exception("Worker dissapeared while running! " + daemon_name) + + if proc['statename'] == 'STOPPED': + return proc + + if proc['statename'] == 'RUNNING': + time.sleep(1) + continue + + if proc['statename'] in ('STARTING', 'BACKOFF', 'FATAL', 'EXITED', 'STOPPING'): + print(f'[🦸‍♂️] WARNING: Worker {daemon_name} {proc["statename"]} {proc["description"]}') + time.sleep(interval) + continue + def start_server_workers(host='0.0.0.0', port='8000', daemonize=False): supervisor = get_or_create_supervisord_process(daemonize=daemonize) bg_workers = [ - { - "name": "worker_scheduler", - "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --disable-health-check --flush-locks", - "autostart": "true", - "autorestart": "true", - "stdout_logfile": "logs/worker_scheduler.log", - "redirect_stderr": "true", - }, - { - "name": "worker_system_tasks", - "command": "archivebox manage djangohuey --queue system_tasks -w 4 -k thread --no-periodic --disable-health-check", - "autostart": "true", - "autorestart": "true", - "stdout_logfile": "logs/worker_system_tasks.log", - "redirect_stderr": "true", - }, + SCHEDULER_WORKER, + COMMAND_WORKER, ] fg_worker = { "name": "worker_daphne", diff --git a/archivebox/queues/tasks.py b/archivebox/queues/tasks.py index acfeab0b..6f62a8c1 100644 --- a/archivebox/queues/tasks.py +++ b/archivebox/queues/tasks.py @@ -1,7 +1,7 @@ __package__ = 'archivebox.queues' from functools import wraps -from django.utils import timezone +# from django.utils import timezone from django_huey import db_task, task @@ -10,7 +10,7 @@ from huey_monitor.tqdm import ProcessInfo from .supervisor_util import get_or_create_supervisord_process -# @db_task(queue="system_tasks", context=True, schedule=1) +# @db_task(queue="commands", context=True, schedule=1) # def scheduler_tick(): # print('SCHEDULER TICK', timezone.now().isoformat()) # # abx.archivebox.events.on_scheduler_runloop_start(timezone.now(), machine=Machine.objects.get_current_machine()) @@ -43,7 +43,7 @@ def db_task_with_parent(func): return wrapper -@db_task(queue="system_tasks", context=True) +@db_task(queue="commands", context=True) def bg_add(add_kwargs, task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -62,7 +62,7 @@ def bg_add(add_kwargs, task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -83,7 +83,7 @@ def bg_archive_links(args, kwargs=None, task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None): get_or_create_supervisord_process(daemonize=False) @@ -104,7 +104,7 @@ def bg_archive_link(args, kwargs=None,task=None, parent_task_id=None): return result -@task(queue="system_tasks", context=True) +@task(queue="commands", context=True) def bg_archive_snapshot(snapshot, overwrite=False, methods=None, task=None, parent_task_id=None): # get_or_create_supervisord_process(daemonize=False) diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index 2e7d4f69..921c074f 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -6,8 +6,8 @@ from typing import List, Union from django.db.models import QuerySet from django.conf import settings -import abx.archivebox.reads - +import abx +import archivebox from archivebox.index.schema import Link from archivebox.misc.util import enforce_types from archivebox.misc.logging import stderr @@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet): def import_backend(): - for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values(): + for backend in abx.as_dict(archivebox.pm.hook.get_SEARCHBACKENDS()).values(): if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE: return backend raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend') diff --git a/archivebox/seeds/models.py b/archivebox/seeds/models.py index b0d83b2e..7fe49c83 100644 --- a/archivebox/seeds/models.py +++ b/archivebox/seeds/models.py @@ -1,19 +1,8 @@ __package__ = 'archivebox.seeds' -from datetime import datetime - -from django_stubs_ext.db.models import TypedModelMeta - from django.db import models -from django.db.models import Q -from django.core.validators import MaxValueValidator, MinValueValidator from django.conf import settings -from django.utils import timezone -from django.utils.functional import cached_property -from django.urls import reverse_lazy - -from pathlib import Path from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats @@ -47,7 +36,10 @@ class Seed(ABIDModel, ModelWithHealthStats): abid_rand_src = 'self.id' abid_drift_allowed = True - uri = models.URLField(max_length=255, blank=False, null=False, unique=True) # unique source location where URLs will be loaded from + id = models.UUIDField(primary_key=True, default=None, null=False, editable=False, unique=True, verbose_name='ID') + abid = ABIDField(prefix=abid_prefix) + + uri = models.URLField(max_length=2000, blank=False, null=False) # unique source location where URLs will be loaded from extractor = models.CharField(default='auto', max_length=32) # suggested extractor to use to load this URL source tags_str = models.CharField(max_length=255, null=False, blank=True, default='') # tags to attach to any URLs that come from this source @@ -64,4 +56,10 @@ class Seed(ABIDModel, ModelWithHealthStats): # pocketapi:// # s3:// # etc.. - return self.uri.split('://')[0].lower() + return self.uri.split('://', 1)[0].lower() + + class Meta: + verbose_name = 'Seed' + verbose_name_plural = 'Seeds' + + unique_together = (('created_by', 'uri', 'extractor'),) diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py deleted file mode 100644 index a997acbb..00000000 --- a/archivebox/vendor/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -import sys -import importlib -from pathlib import Path - -VENDOR_DIR = Path(__file__).parent - -VENDORED_LIBS = { - # sys.path dir: library name - #'python-atomicwrites': 'atomicwrites', - #'django-taggit': 'taggit', - 'pydantic-pkgr': 'pydantic_pkgr', - 'pocket': 'pocket', - #'base32-crockford': 'base32_crockford', -} - -def load_vendored_libs(): - for lib_subdir, lib_name in VENDORED_LIBS.items(): - lib_dir = VENDOR_DIR / lib_subdir - assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}' - - try: - lib = importlib.import_module(lib_name) - # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}") - except ImportError: - sys.path.append(str(lib_dir)) - try: - lib = importlib.import_module(lib_name) - # print(f"Successfully imported lib from vendored fallback {lib_name}: {inspect.getfile(lib)}") - except ImportError as e: - print(f"Failed to import lib from environment or vendored fallback {lib_name}: {e}", file=sys.stderr) - sys.exit(1) - - diff --git a/archivebox/vendor/pocket b/archivebox/vendor/pocket deleted file mode 160000 index e7970b63..00000000 --- a/archivebox/vendor/pocket +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5 diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr deleted file mode 160000 index a116eaef..00000000 --- a/archivebox/vendor/pydantic-pkgr +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a116eaef7f090dc872b18e82b5a538313075ded6 diff --git a/archivebox/vendor/requirements.txt b/archivebox/vendor/requirements.txt deleted file mode 100644 index 43be87c2..00000000 --- a/archivebox/vendor/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -# this folder contains vendored versions of these packages - -#atomicwrites==1.4.0 -#pocket==0.3.7 -pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7 -#django-taggit==1.3.0 -#base32-crockford==0.3.0 -pydantic-pkgr>=0.4.7 diff --git a/click_test.py b/click_test.py new file mode 100644 index 00000000..4e4a0e40 --- /dev/null +++ b/click_test.py @@ -0,0 +1,30 @@ +import sys +import click +from rich import print +from archivebox.config.django import setup_django + +setup_django() + + +def parse_stdin_to_args(io=sys.stdin): + for line in io.read().split('\n'): + for url_or_id in line.split(' '): + if url_or_id.strip(): + yield url_or_id.strip() + + +# Gather data from stdin in case using a pipe +if not sys.stdin.isatty(): + sys.argv += parse_stdin_to_args(sys.stdin) + + +@click.command() +@click.argument("snapshot_ids_or_urls", type=str, nargs=-1) +def extract(snapshot_ids_or_urls): + for url_or_snapshot_id in snapshot_ids_or_urls: + print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr) + for result in archivebox.pm.hook.extract(url_or_snapshot_id): + print(result) + +if __name__ == "__main__": + extract() diff --git a/pyproject.toml b/pyproject.toml index f692da81..aceae950 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "archivebox" -version = "0.8.5rc53" +version = "0.8.6rc0" requires-python = ">=3.10" description = "Self-hosted internet archiving solution." authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] @@ -39,13 +39,14 @@ classifiers = [ dependencies = [ - # ... archivebox/vendor/* # see vendored libs here + # ... archivebox/pkgs/* # see vendored libs here ############# Django / Core Libraries ############# "setuptools>=74.1.0", "django>=5.1.1,<6.0", "django-ninja>=1.3.0", "django-extensions>=3.2.3", "mypy-extensions>=1.0.0", + "typing_extensions>=4.12.2", "channels[daphne]>=4.1.0", "django-signal-webhooks>=0.3.0", "django-admin-data-views>=0.4.1", @@ -60,7 +61,7 @@ dependencies = [ "pluggy>=1.5.0", "requests>=2.32.3", "dateparser>=1.2.0", - "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553 + "tzdata>=2024.2", # needed for dateparser {TZ: UTC} on some systems: https://github.com/ArchiveBox/ArchiveBox/issues/1553 "feedparser>=6.0.11", "w3lib>=2.2.1", "rich>=13.8.0", @@ -69,20 +70,51 @@ dependencies = [ "typeid-python>=0.3.1", "psutil>=6.0.0", "supervisor>=4.2.5", - "python-crontab>=3.2.0", # for: archivebox schedule - "croniter>=3.0.3", # for: archivebox schedule - "ipython>=8.27.0", # for: archivebox shell - "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid + "python-crontab>=3.2.0", # for: archivebox schedule + "croniter>=3.0.3", # for: archivebox schedule + "ipython>=8.27.0", # for: archivebox shell + "py-machineid>=0.6.0", # for: machine/detect.py calculating machine guid "python-benedict[io,parse]>=0.33.2", "pydantic-settings>=2.5.2", "atomicwrites==1.4.1", "django-taggit==6.1.0", "base32-crockford==0.3.0", - # "pocket@git+https://github.com/tapanpandita/pocket.git@v0.3.7", + "platformdirs>=4.3.6", "pydantic-pkgr>=0.5.4", - ############# Plugin Dependencies ################ + "pocket>=0.3.6", "sonic-client>=1.0.0", - "yt-dlp>=2024.8.6", # for: media" + "yt-dlp>=2024.8.6", # for: media" + ############# Plugin Dependencies ################ + "abx>=0.1.0", + "abx-spec-pydantic-pkgr>=0.1.0", + "abx-spec-config>=0.1.0", + "abx-spec-archivebox>=0.1.0", + "abx-spec-django>=0.1.0", + "abx-spec-extractor>=0.1.0", + "abx-spec-searchbackend>=0.1.0", + "abx-plugin-default-binproviders>=2024.10.24", + "abx-plugin-pip>=2024.10.24", + "abx-plugin-npm>=2024.10.24", + "abx-plugin-playwright>=2024.10.24", + "abx-plugin-puppeteer>=2024.10.28", + "abx-plugin-ripgrep-search>=2024.10.28", + "abx-plugin-sqlitefts-search>=2024.10.28", + "abx-plugin-sonic-search>=2024.10.28", + "abx-plugin-ldap-auth>=2024.10.28", + "abx-plugin-curl>=2024.10.27", + "abx-plugin-wget>=2024.10.28", + "abx-plugin-git>=2024.10.28", + "abx-plugin-chrome>=2024.10.28", + "abx-plugin-ytdlp>=2024.10.28", + "abx-plugin-title>=2024.10.27", + "abx-plugin-favicon>=2024.10.27", + # "abx-plugin-headers>=2024.10.27", + "abx-plugin-archivedotorg>=2024.10.28", + "abx-plugin-singlefile>=2024.10.28", + "abx-plugin-readability>=2024.10.28", + "abx-plugin-mercury>=2024.10.28", + "abx-plugin-htmltotext>=2024.10.28", + "python-statemachine>=2.3.6", ] [project.optional-dependencies] @@ -113,7 +145,7 @@ all = [ [tool.uv] dev-dependencies = [ ### BUILD - "uv", + "uv>=0.4.26", "pip>=24.2", "setuptools>=75.1.0", "wheel>=0.44.0", @@ -121,14 +153,17 @@ dev-dependencies = [ #"homebrew-pypi-poet>=0.10.0", # for: generating archivebox.rb brewfile list of python packages ### DOCS "recommonmark>=0.7.1", - "sphinx", + "sphinx>=8.1.3", "sphinx-rtd-theme>=2.0.0", ### DEBUGGING - "archivebox[debug]", + "django-debug-toolbar>=4.4.6", + "requests-tracker>=0.3.3", + "djdt_flamegraph>=0.2.13", + "ipdb>=0.13.13", "logfire[django]>=0.51.0", "opentelemetry-instrumentation-django>=0.47b0", "opentelemetry-instrumentation-sqlite3>=0.47b0", - "viztracer", # usage: viztracer ../.venv/bin/archivebox manage check + "viztracer>=0.17.0", # usage: viztracer ../.venv/bin/archivebox manage check # "snakeviz", # usage: python -m cProfile -o flamegraph.prof ../.venv/bin/archivebox manage check ### TESTING "pytest>=8.3.3", @@ -139,8 +174,47 @@ dev-dependencies = [ "mypy>=1.11.2", ] +[tool.uv.sources] +# pydantic-pkgr = { workspace = true } + +abx = { workspace = true } +abx-spec-pydantic-pkgr = { workspace = true } +abx-spec-config = { workspace = true } +abx-spec-archivebox = { workspace = true } +abx-spec-django = { workspace = true } +abx-spec-extractor = { workspace = true } +abx-spec-searchbackend = { workspace = true } + +abx-plugin-default-binproviders = { workspace = true } +abx-plugin-pip = { workspace = true } +abx-plugin-npm = { workspace = true } +abx-plugin-playwright = { workspace = true } +abx-plugin-puppeteer = { workspace = true } +abx-plugin-ripgrep-search = { workspace = true } +abx-plugin-sqlitefts-search = { workspace = true } +abx-plugin-sonic-search = { workspace = true } +abx-plugin-ldap-auth = { workspace = true } + +abx-plugin-curl = { workspace = true } +abx-plugin-wget = { workspace = true } +abx-plugin-git = { workspace = true } +abx-plugin-chrome = { workspace = true } +abx-plugin-ytdlp = { workspace = true } + +abx-plugin-title = { workspace = true } +abx-plugin-favicon = { workspace = true } +# abx-plugin-headers = { workspace = true } +abx-plugin-archivedotorg = { workspace = true } + +abx-plugin-singlefile = { workspace = true } +abx-plugin-readability = { workspace = true } +abx-plugin-mercury = { workspace = true } +abx-plugin-htmltotext = { workspace = true } + + [tool.uv.workspace] -members = ["packages/*"] +members = ["archivebox/pkgs/*"] +exclude = ["archivebox/pkgs/__pycache__"] [build-system] requires = ["pdm-backend"] @@ -155,7 +229,7 @@ package-dir = {"archivebox" = "archivebox"} line-length = 140 target-version = "py310" src = ["archivebox"] -exclude = ["*.pyi", "typings/", "migrations/", "vendor/"] +exclude = ["*.pyi", "typings/", "migrations/"] # https://docs.astral.sh/ruff/rules/ [tool.ruff.lint] @@ -190,7 +264,6 @@ exclude = [ "**/node_modules", "**/__pycache__", "**/migrations", - "archivebox/vendor", ] stubPath = "./archivebox/typings" venvPath = "." diff --git a/requirements.txt b/requirements.txt index f9a37b4b..cf5cbb48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,137 @@ # This file was autogenerated by uv via the following command: # uv pip compile pyproject.toml --all-extras -o requirements.txt +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-default-binproviders + # abx-plugin-favicon + # abx-plugin-git + # abx-plugin-htmltotext + # abx-plugin-ldap-auth + # abx-plugin-mercury + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-readability + # abx-plugin-ripgrep-search + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search + # abx-plugin-title + # abx-plugin-wget + # abx-plugin-ytdlp + # abx-spec-archivebox + # abx-spec-config + # abx-spec-django + # abx-spec-extractor + # abx-spec-pydantic-pkgr + # abx-spec-searchbackend +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-favicon + # abx-plugin-title +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders + # via + # archivebox (pyproject.toml) + # abx-plugin-git + # abx-plugin-npm + # abx-plugin-pip +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config + # via + # archivebox (pyproject.toml) + # abx-plugin-archivedotorg + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-favicon + # abx-plugin-git + # abx-plugin-htmltotext + # abx-plugin-ldap-auth + # abx-plugin-mercury + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-readability + # abx-plugin-ripgrep-search + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search + # abx-plugin-title + # abx-plugin-wget + # abx-plugin-ytdlp +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django + # via + # archivebox (pyproject.toml) + # abx-plugin-ldap-auth +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor + # via archivebox (pyproject.toml) +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr + # via + # archivebox (pyproject.toml) + # abx-plugin-chrome + # abx-plugin-curl + # abx-plugin-default-binproviders + # abx-plugin-git + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-wget + # abx-plugin-ytdlp +-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend + # via + # archivebox (pyproject.toml) + # abx-plugin-ripgrep-search + # abx-plugin-sonic-search + # abx-plugin-sqlitefts-search annotated-types==0.7.0 # via pydantic anyio==4.6.2.post1 @@ -29,9 +161,9 @@ beautifulsoup4==4.12.3 # via python-benedict brotli==1.1.0 # via yt-dlp -bx-django-utils==79 +bx-django-utils==81 # via django-huey-monitor -bx-py-utils==104 +bx-py-utils==105 # via # bx-django-utils # django-huey-monitor @@ -49,7 +181,7 @@ charset-normalizer==3.4.0 # via requests constantly==23.10.4 # via twisted -croniter==3.0.3 +croniter==5.0.1 # via archivebox (pyproject.toml) cryptography==43.0.3 # via @@ -62,15 +194,23 @@ daphne==4.1.2 dateparser==1.2.0 # via archivebox (pyproject.toml) decorator==5.1.1 - # via ipython + # via + # ipdb + # ipython django==5.1.2 # via # archivebox (pyproject.toml) + # abx + # abx-plugin-pip + # abx-spec-archivebox + # abx-spec-django # bx-django-utils # channels # django-admin-data-views # django-auth-ldap + # django-autotyping # django-charid-field + # django-debug-toolbar # django-extensions # django-huey # django-huey-monitor @@ -81,12 +221,17 @@ django==5.1.2 # django-stubs # django-stubs-ext # django-taggit -django-admin-data-views==0.4.1 + # requests-tracker +django-admin-data-views==0.4.2 # via archivebox (pyproject.toml) django-auth-ldap==5.1.0 # via archivebox (pyproject.toml) +django-autotyping==0.5.1 + # via archivebox (pyproject.toml) django-charid-field==0.4 # via archivebox (pyproject.toml) +django-debug-toolbar==4.4.6 + # via archivebox (pyproject.toml) django-extensions==3.2.3 # via archivebox (pyproject.toml) django-huey==1.2.1 @@ -101,25 +246,27 @@ django-object-actions==4.3.0 # via archivebox (pyproject.toml) django-pydantic-field==0.3.10 # via archivebox (pyproject.toml) -django-settings-holder==0.1.2 +django-settings-holder==0.2.2 # via # django-admin-data-views # django-signal-webhooks -django-signal-webhooks==0.3.0 +django-signal-webhooks==0.3.1 # via archivebox (pyproject.toml) -django-stubs==5.1.0 +django-stubs==5.1.1 # via archivebox (pyproject.toml) -django-stubs-ext==5.1.0 +django-stubs-ext==5.1.1 # via django-stubs django-taggit==6.1.0 # via archivebox (pyproject.toml) -et-xmlfile==1.1.0 +djdt-flamegraph==0.2.13 + # via archivebox (pyproject.toml) +et-xmlfile==2.0.0 # via openpyxl executing==2.1.0 # via stack-data feedparser==6.0.11 # via archivebox (pyproject.toml) -ftfy==6.3.0 +ftfy==6.3.1 # via python-benedict h11==0.14.0 # via httpcore @@ -144,10 +291,16 @@ idna==3.10 # twisted incremental==24.7.2 # via twisted -ipython==8.28.0 +ipdb==0.13.13 # via archivebox (pyproject.toml) +ipython==8.29.0 + # via + # archivebox (pyproject.toml) + # ipdb jedi==0.19.1 # via ipython +libcst==1.5.0 + # via django-autotyping mailchecker==6.0.11 # via python-benedict markdown-it-py==3.0.0 @@ -166,11 +319,17 @@ parso==0.8.4 # via jedi pexpect==4.9.0 # via ipython -phonenumbers==8.13.47 +phonenumbers==8.13.48 # via python-benedict platformdirs==4.3.6 - # via pydantic-pkgr + # via + # archivebox (pyproject.toml) + # pydantic-pkgr pluggy==1.5.0 + # via + # archivebox (pyproject.toml) + # abx +pocket==0.3.6 # via archivebox (pyproject.toml) prompt-toolkit==3.0.48 # via ipython @@ -197,6 +356,10 @@ pycryptodomex==3.21.0 # via yt-dlp pydantic==2.9.2 # via + # abx-plugin-playwright + # abx-spec-config + # abx-spec-extractor + # abx-spec-searchbackend # django-ninja # django-pydantic-field # pydantic-pkgr @@ -206,9 +369,21 @@ pydantic-core==2.23.4 # pydantic # pydantic-pkgr pydantic-pkgr==0.5.4 - # via archivebox (pyproject.toml) -pydantic-settings==2.6.0 - # via archivebox (pyproject.toml) + # via + # archivebox (pyproject.toml) + # abx-plugin-default-binproviders + # abx-plugin-npm + # abx-plugin-pip + # abx-plugin-playwright + # abx-plugin-puppeteer + # abx-plugin-singlefile + # abx-plugin-sonic-search + # abx-plugin-ytdlp + # abx-spec-pydantic-pkgr +pydantic-settings==2.6.1 + # via + # archivebox (pyproject.toml) + # abx-spec-config pygments==2.18.0 # via # ipython @@ -216,7 +391,11 @@ pygments==2.18.0 pyopenssl==24.2.1 # via twisted python-benedict==0.34.0 - # via archivebox (pyproject.toml) + # via + # archivebox (pyproject.toml) + # abx-spec-config + # abx-spec-extractor + # abx-spec-searchbackend python-crontab==3.2.0 # via archivebox (pyproject.toml) python-dateutil==2.9.0.post0 @@ -242,23 +421,29 @@ pytz==2024.2 # croniter # dateparser pyyaml==6.0.2 - # via python-benedict + # via + # libcst + # python-benedict regex==2024.9.11 # via dateparser requests==2.32.3 # via # archivebox (pyproject.toml) + # pocket # python-benedict # yt-dlp -rich==13.9.2 +requests-tracker==0.3.3 + # via archivebox (pyproject.toml) +rich==13.9.4 # via # archivebox (pyproject.toml) + # abx-spec-config # rich-argparse -rich-argparse==1.5.2 +rich-argparse==1.6.0 # via archivebox (pyproject.toml) -service-identity==24.1.0 +service-identity==24.2.0 # via twisted -setuptools==75.2.0 +setuptools==75.3.0 # via # archivebox (pyproject.toml) # autobahn @@ -280,7 +465,10 @@ sonic-client==1.0.0 soupsieve==2.6 # via beautifulsoup4 sqlparse==0.5.1 - # via django + # via + # django + # django-debug-toolbar + # requests-tracker stack-data==0.6.3 # via ipython supervisor==4.2.5 @@ -293,7 +481,7 @@ traitlets==5.14.3 # via # ipython # matplotlib-inline -twisted==24.7.0 +twisted==24.10.0 # via daphne txaio==23.1.1 # via autobahn @@ -303,6 +491,7 @@ types-pyyaml==6.0.12.20240917 # via django-stubs typing-extensions==4.12.2 # via + # archivebox (pyproject.toml) # django-pydantic-field # django-stubs # django-stubs-ext @@ -310,6 +499,8 @@ typing-extensions==4.12.2 # pydantic-core # pydantic-pkgr # twisted +tzdata==2024.2 + # via archivebox (pyproject.toml) tzlocal==5.2 # via dateparser ulid-py==1.1.0 @@ -332,7 +523,7 @@ xlrd==2.0.1 # via python-benedict xmltodict==0.14.2 # via python-benedict -yt-dlp==2024.10.7 +yt-dlp==2024.10.22 # via archivebox (pyproject.toml) -zope-interface==7.1.0 +zope-interface==7.1.1 # via twisted diff --git a/uv.lock b/uv.lock index 1436d2f1..761668b7 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,565 @@ resolution-markers = [ "python_full_version >= '3.13'", ] +[manifest] +members = [ + "abx", + "abx-plugin-archivedotorg", + "abx-plugin-chrome", + "abx-plugin-curl", + "abx-plugin-default-binproviders", + "abx-plugin-favicon", + "abx-plugin-git", + "abx-plugin-htmltotext", + "abx-plugin-ldap-auth", + "abx-plugin-mercury", + "abx-plugin-npm", + "abx-plugin-pip", + "abx-plugin-playwright", + "abx-plugin-pocket", + "abx-plugin-puppeteer", + "abx-plugin-readability", + "abx-plugin-readwise", + "abx-plugin-ripgrep-search", + "abx-plugin-singlefile", + "abx-plugin-sonic-search", + "abx-plugin-sqlitefts-search", + "abx-plugin-title", + "abx-plugin-wget", + "abx-plugin-ytdlp", + "abx-spec-archivebox", + "abx-spec-config", + "abx-spec-django", + "abx-spec-extractor", + "abx-spec-pydantic-pkgr", + "abx-spec-searchbackend", + "archivebox", +] + +[[package]] +name = "abx" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx" } +dependencies = [ + { name = "django" }, + { name = "pluggy" }, +] + +[package.metadata] +requires-dist = [ + { name = "django", specifier = ">=5.1.1,<6.0" }, + { name = "pluggy", specifier = ">=1.5.0" }, +] + +[[package]] +name = "abx-plugin-archivedotorg" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-archivedotorg" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-chrome" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-chrome" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-curl" +version = "2024.10.24" +source = { editable = "archivebox/pkgs/abx-plugin-curl" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-default-binproviders" +version = "2024.10.24" +source = { editable = "archivebox/pkgs/abx-plugin-default-binproviders" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-favicon" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-favicon" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-git" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-git" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-htmltotext" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-htmltotext" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-ldap-auth" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-ldap-auth" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" }, +] + +[[package]] +name = "abx-plugin-mercury" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-mercury" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-npm" +version = "2024.10.24" +source = { editable = "archivebox/pkgs/abx-plugin-npm" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-pip" +version = "2024.10.24" +source = { editable = "archivebox/pkgs/abx-plugin-pip" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "django" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "django", specifier = ">=5.0.0" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-playwright" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-playwright" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic", specifier = ">=2.4.2" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-pocket" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-pocket" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "pocket" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "pocket", specifier = ">=0.3.6" }, +] + +[[package]] +name = "abx-plugin-puppeteer" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-puppeteer" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-readability" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-readability" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-readwise" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-readwise" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-ripgrep-search" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-ripgrep-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-searchbackend" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, +] + +[[package]] +name = "abx-plugin-singlefile" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-singlefile" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-sonic-search" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-sonic-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-plugin-sqlitefts-search" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-searchbackend" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, +] + +[[package]] +name = "abx-plugin-title" +version = "2024.10.27" +source = { editable = "archivebox/pkgs/abx-plugin-title" } +dependencies = [ + { name = "abx" }, + { name = "abx-plugin-curl" }, + { name = "abx-spec-config" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, +] + +[[package]] +name = "abx-plugin-wget" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-wget" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, +] + +[[package]] +name = "abx-plugin-ytdlp" +version = "2024.10.28" +source = { editable = "archivebox/pkgs/abx-plugin-ytdlp" } +dependencies = [ + { name = "abx" }, + { name = "abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-spec-archivebox" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-archivebox" } +dependencies = [ + { name = "abx" }, + { name = "django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "django", specifier = ">=5.1.1,<6.0" }, +] + +[[package]] +name = "abx-spec-config" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-config" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "pydantic-settings" }, + { name = "python-benedict" }, + { name = "rich" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "pydantic", specifier = ">=2.9.2" }, + { name = "pydantic-settings", specifier = ">=2.6.0" }, + { name = "python-benedict", specifier = ">=0.34.0" }, + { name = "rich", specifier = ">=13.9.3" }, +] + +[[package]] +name = "abx-spec-django" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-django" } +dependencies = [ + { name = "abx" }, + { name = "django" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "django", specifier = ">=5.1.1,<6.0" }, +] + +[[package]] +name = "abx-spec-extractor" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-extractor" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "python-benedict" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "pydantic", specifier = ">=2.5.0" }, + { name = "python-benedict", specifier = ">=0.26.0" }, +] + +[[package]] +name = "abx-spec-pydantic-pkgr" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" } +dependencies = [ + { name = "abx" }, + { name = "pydantic-pkgr" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "pydantic-pkgr", specifier = ">=0.5.4" }, +] + +[[package]] +name = "abx-spec-searchbackend" +version = "0.1.0" +source = { editable = "archivebox/pkgs/abx-spec-searchbackend" } +dependencies = [ + { name = "abx" }, + { name = "pydantic" }, + { name = "python-benedict" }, +] + +[package.metadata] +requires-dist = [ + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "pydantic", specifier = ">=2.5.0" }, + { name = "python-benedict", specifier = ">=0.26.0" }, +] + [[package]] name = "alabaster" version = "1.0.0" @@ -41,9 +600,37 @@ wheels = [ [[package]] name = "archivebox" -version = "0.8.5rc53" +version = "0.8.6rc0" source = { editable = "." } dependencies = [ + { name = "abx" }, + { name = "abx-plugin-archivedotorg" }, + { name = "abx-plugin-chrome" }, + { name = "abx-plugin-curl" }, + { name = "abx-plugin-default-binproviders" }, + { name = "abx-plugin-favicon" }, + { name = "abx-plugin-git" }, + { name = "abx-plugin-htmltotext" }, + { name = "abx-plugin-ldap-auth" }, + { name = "abx-plugin-mercury" }, + { name = "abx-plugin-npm" }, + { name = "abx-plugin-pip" }, + { name = "abx-plugin-playwright" }, + { name = "abx-plugin-puppeteer" }, + { name = "abx-plugin-readability" }, + { name = "abx-plugin-ripgrep-search" }, + { name = "abx-plugin-singlefile" }, + { name = "abx-plugin-sonic-search" }, + { name = "abx-plugin-sqlitefts-search" }, + { name = "abx-plugin-title" }, + { name = "abx-plugin-wget" }, + { name = "abx-plugin-ytdlp" }, + { name = "abx-spec-archivebox" }, + { name = "abx-spec-config" }, + { name = "abx-spec-django" }, + { name = "abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend" }, { name = "atomicwrites" }, { name = "base32-crockford" }, { name = "channels", extra = ["daphne"] }, @@ -65,13 +652,16 @@ dependencies = [ { name = "feedparser" }, { name = "ipython" }, { name = "mypy-extensions" }, + { name = "platformdirs" }, { name = "pluggy" }, + { name = "pocket" }, { name = "psutil" }, { name = "py-machineid" }, { name = "pydantic-pkgr" }, { name = "pydantic-settings" }, { name = "python-benedict", extra = ["io", "parse"] }, { name = "python-crontab" }, + { name = "python-statemachine" }, { name = "requests" }, { name = "rich" }, { name = "rich-argparse" }, @@ -79,6 +669,7 @@ dependencies = [ { name = "sonic-client" }, { name = "supervisor" }, { name = "typeid-python" }, + { name = "typing-extensions" }, { name = "tzdata" }, { name = "ulid-py" }, { name = "w3lib" }, @@ -88,7 +679,19 @@ dependencies = [ [package.optional-dependencies] all = [ { name = "django-auth-ldap" }, + { name = "django-autotyping" }, + { name = "django-debug-toolbar" }, + { name = "djdt-flamegraph" }, + { name = "ipdb" }, { name = "python-ldap" }, + { name = "requests-tracker" }, +] +debug = [ + { name = "django-autotyping" }, + { name = "django-debug-toolbar" }, + { name = "djdt-flamegraph" }, + { name = "ipdb" }, + { name = "requests-tracker" }, ] ldap = [ { name = "django-auth-ldap" }, @@ -99,11 +702,9 @@ ldap = [ dev = [ { name = "bottle" }, { name = "bumpver" }, - { name = "django-autotyping" }, { name = "django-debug-toolbar" }, { name = "djdt-flamegraph" }, { name = "flake8" }, - { name = "homebrew-pypi-poet" }, { name = "ipdb" }, { name = "logfire", extra = ["django"] }, { name = "mypy" }, @@ -124,7 +725,35 @@ dev = [ [package.metadata] requires-dist = [ - { name = "archivebox", extras = ["sonic", "ldap"], marker = "extra == 'all'" }, + { name = "abx", editable = "archivebox/pkgs/abx" }, + { name = "abx-plugin-archivedotorg", editable = "archivebox/pkgs/abx-plugin-archivedotorg" }, + { name = "abx-plugin-chrome", editable = "archivebox/pkgs/abx-plugin-chrome" }, + { name = "abx-plugin-curl", editable = "archivebox/pkgs/abx-plugin-curl" }, + { name = "abx-plugin-default-binproviders", editable = "archivebox/pkgs/abx-plugin-default-binproviders" }, + { name = "abx-plugin-favicon", editable = "archivebox/pkgs/abx-plugin-favicon" }, + { name = "abx-plugin-git", editable = "archivebox/pkgs/abx-plugin-git" }, + { name = "abx-plugin-htmltotext", editable = "archivebox/pkgs/abx-plugin-htmltotext" }, + { name = "abx-plugin-ldap-auth", editable = "archivebox/pkgs/abx-plugin-ldap-auth" }, + { name = "abx-plugin-mercury", editable = "archivebox/pkgs/abx-plugin-mercury" }, + { name = "abx-plugin-npm", editable = "archivebox/pkgs/abx-plugin-npm" }, + { name = "abx-plugin-pip", editable = "archivebox/pkgs/abx-plugin-pip" }, + { name = "abx-plugin-playwright", editable = "archivebox/pkgs/abx-plugin-playwright" }, + { name = "abx-plugin-puppeteer", editable = "archivebox/pkgs/abx-plugin-puppeteer" }, + { name = "abx-plugin-readability", editable = "archivebox/pkgs/abx-plugin-readability" }, + { name = "abx-plugin-ripgrep-search", editable = "archivebox/pkgs/abx-plugin-ripgrep-search" }, + { name = "abx-plugin-singlefile", editable = "archivebox/pkgs/abx-plugin-singlefile" }, + { name = "abx-plugin-sonic-search", editable = "archivebox/pkgs/abx-plugin-sonic-search" }, + { name = "abx-plugin-sqlitefts-search", editable = "archivebox/pkgs/abx-plugin-sqlitefts-search" }, + { name = "abx-plugin-title", editable = "archivebox/pkgs/abx-plugin-title" }, + { name = "abx-plugin-wget", editable = "archivebox/pkgs/abx-plugin-wget" }, + { name = "abx-plugin-ytdlp", editable = "archivebox/pkgs/abx-plugin-ytdlp" }, + { name = "abx-spec-archivebox", editable = "archivebox/pkgs/abx-spec-archivebox" }, + { name = "abx-spec-config", editable = "archivebox/pkgs/abx-spec-config" }, + { name = "abx-spec-django", editable = "archivebox/pkgs/abx-spec-django" }, + { name = "abx-spec-extractor", editable = "archivebox/pkgs/abx-spec-extractor" }, + { name = "abx-spec-pydantic-pkgr", editable = "archivebox/pkgs/abx-spec-pydantic-pkgr" }, + { name = "abx-spec-searchbackend", editable = "archivebox/pkgs/abx-spec-searchbackend" }, + { name = "archivebox", extras = ["sonic", "ldap", "debug"], marker = "extra == 'all'" }, { name = "atomicwrites", specifier = "==1.4.1" }, { name = "base32-crockford", specifier = "==0.3.0" }, { name = "channels", extras = ["daphne"], specifier = ">=4.1.0" }, @@ -133,7 +762,9 @@ requires-dist = [ { name = "django", specifier = ">=5.1.1,<6.0" }, { name = "django-admin-data-views", specifier = ">=0.4.1" }, { name = "django-auth-ldap", marker = "extra == 'ldap'", specifier = ">=4.1.0" }, + { name = "django-autotyping", marker = "extra == 'debug'", specifier = ">=0.5.1" }, { name = "django-charid-field", specifier = ">=0.4" }, + { name = "django-debug-toolbar", marker = "extra == 'debug'", specifier = ">=4.4.6" }, { name = "django-extensions", specifier = ">=3.2.3" }, { name = "django-huey", specifier = ">=1.2.1" }, { name = "django-huey-monitor", specifier = ">=0.9.0" }, @@ -144,10 +775,14 @@ requires-dist = [ { name = "django-signal-webhooks", specifier = ">=0.3.0" }, { name = "django-stubs", specifier = ">=5.0.4" }, { name = "django-taggit", specifier = "==6.1.0" }, + { name = "djdt-flamegraph", marker = "extra == 'debug'", specifier = ">=0.2.13" }, { name = "feedparser", specifier = ">=6.0.11" }, + { name = "ipdb", marker = "extra == 'debug'", specifier = ">=0.13.13" }, { name = "ipython", specifier = ">=8.27.0" }, { name = "mypy-extensions", specifier = ">=1.0.0" }, + { name = "platformdirs", specifier = ">=4.3.6" }, { name = "pluggy", specifier = ">=1.5.0" }, + { name = "pocket", specifier = ">=0.3.6" }, { name = "psutil", specifier = ">=6.0.0" }, { name = "py-machineid", specifier = ">=0.6.0" }, { name = "pydantic-pkgr", specifier = ">=0.5.4" }, @@ -155,13 +790,16 @@ requires-dist = [ { name = "python-benedict", extras = ["io", "parse"], specifier = ">=0.33.2" }, { name = "python-crontab", specifier = ">=3.2.0" }, { name = "python-ldap", marker = "extra == 'ldap'", specifier = ">=3.4.3" }, + { name = "python-statemachine", specifier = ">=2.3.6" }, { name = "requests", specifier = ">=2.32.3" }, + { name = "requests-tracker", marker = "extra == 'debug'", specifier = ">=0.3.3" }, { name = "rich", specifier = ">=13.8.0" }, { name = "rich-argparse", specifier = ">=1.5.2" }, { name = "setuptools", specifier = ">=74.1.0" }, { name = "sonic-client", specifier = ">=1.0.0" }, { name = "supervisor", specifier = ">=4.2.5" }, { name = "typeid-python", specifier = ">=0.3.1" }, + { name = "typing-extensions", specifier = ">=4.12.2" }, { name = "tzdata", specifier = ">=2024.2" }, { name = "ulid-py", specifier = ">=1.1.0" }, { name = "w3lib", specifier = ">=2.2.1" }, @@ -172,11 +810,9 @@ requires-dist = [ dev = [ { name = "bottle", specifier = ">=0.13.1" }, { name = "bumpver", specifier = ">=2023.1129" }, - { name = "django-autotyping", specifier = ">=0.5.1" }, { name = "django-debug-toolbar", specifier = ">=4.4.6" }, { name = "djdt-flamegraph", specifier = ">=0.2.13" }, { name = "flake8", specifier = ">=7.1.1" }, - { name = "homebrew-pypi-poet", specifier = ">=0.10.0" }, { name = "ipdb", specifier = ">=0.13.13" }, { name = "logfire", extras = ["django"], specifier = ">=0.51.0" }, { name = "mypy", specifier = ">=1.11.2" }, @@ -188,10 +824,10 @@ dev = [ { name = "requests-tracker", specifier = ">=0.3.3" }, { name = "ruff", specifier = ">=0.6.6" }, { name = "setuptools", specifier = ">=75.1.0" }, - { name = "sphinx" }, + { name = "sphinx", specifier = ">=8.1.3" }, { name = "sphinx-rtd-theme", specifier = ">=2.0.0" }, - { name = "uv" }, - { name = "viztracer" }, + { name = "uv", specifier = ">=0.4.26" }, + { name = "viztracer", specifier = ">=0.17.0" }, { name = "wheel", specifier = ">=0.44.0" }, ] @@ -407,25 +1043,25 @@ wheels = [ [[package]] name = "bx-django-utils" -version = "79" +version = "81" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "bx-py-utils" }, { name = "django" }, { name = "python-stdnum" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/62/8e/d56ffeb8f39c176b03439f614526b0b7de2d298bbf3250d6fdd40521cc51/bx_django_utils-79.tar.gz", hash = "sha256:cb66087d4e9396281acf5a4394b749cff3062b66082d5726f6a8a342fdd35d0e", size = 190245 } +sdist = { url = "https://files.pythonhosted.org/packages/e7/4a/a4087420852629abd835a17f7d41eca9efa93453c6dcaa29697f40195021/bx_django_utils-81.tar.gz", hash = "sha256:0896f53d737ddda3e98085803e9f469abc4b84561d4062ec13aa40b14e9453b8", size = 192245 } wheels = [ - { url = "https://files.pythonhosted.org/packages/21/a1/dc24b907e2671512826d3c6593f79e4f78f8fc85544fbbf54102bacc08c9/bx_django_utils-79-py3-none-any.whl", hash = "sha256:d50b10ace24b0b363574542faecf04a81029e2fec6d6e6525fe063ed06238e04", size = 199326 }, + { url = "https://files.pythonhosted.org/packages/28/8e/692dce1f10303c6f4a03f5c2ae646d36b555c6190f17e11a2a469f9bdc48/bx_django_utils-81-py3-none-any.whl", hash = "sha256:b7ca9a801f0a160fd68c5744b7449552a3029484c373b8aaa2f41d0d50431b51", size = 199480 }, ] [[package]] name = "bx-py-utils" -version = "104" +version = "105" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/25/9d/d60b1594c40f63b77752a4cdba5ddb746fc61653ba6ea6f79995198087a9/bx_py_utils-104.tar.gz", hash = "sha256:508cfc1d0fa6c22298f697c4efaa913337847d488d8a53eeccfae9ee106123f6", size = 190865 } +sdist = { url = "https://files.pythonhosted.org/packages/f7/c3/4949fd3031a26eaf7378befacc5a2858d68a4e328b342e2ffc4c321c9a89/bx_py_utils-105.tar.gz", hash = "sha256:1bb7c1401147df35a95ca78c1de9f25d104aeda941a5cc89f9cfc2d1616ddbd7", size = 192317 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2e/da/959a65959ae49ad949ed9e1375df12c8b61f4af041d644a81403daf4f915/bx_py_utils-104-py3-none-any.whl", hash = "sha256:c92ebc4fb122e3e3c228d984d0a1f5c3284c3da6aab1a1c753f7eb1f71bdab3a", size = 175501 }, + { url = "https://files.pythonhosted.org/packages/6c/e5/da929891157b56f7a9bf825118926910e5e3629eb1cd3ec441d292e7501c/bx_py_utils-105-py3-none-any.whl", hash = "sha256:d441b0e413f8b19b03ab1784187ca2cf2ec5b68d64082790bdbca16a4612cb3e", size = 175660 }, ] [[package]] @@ -622,15 +1258,15 @@ wheels = [ [[package]] name = "croniter" -version = "3.0.3" +version = "5.0.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "python-dateutil" }, { name = "pytz" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/44/7a/14b0b14ab0203e2c79493cf487829dc294d5c44bedc810ab2f4a97fc9ff4/croniter-3.0.3.tar.gz", hash = "sha256:34117ec1741f10a7bd0ec3ad7d8f0eb8fa457a2feb9be32e6a2250e158957668", size = 53088 } +sdist = { url = "https://files.pythonhosted.org/packages/a7/8c/0656200bfa5c1e90b26f4bb1cc8aecb4a7722f8386ee044bdc2d4efb589e/croniter-5.0.1.tar.gz", hash = "sha256:7d9b1ef25b10eece48fdf29d8ac52f9b6252abff983ac614ade4f3276294019e", size = 57084 } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/6a/f2f68e0f9cf702b6d055ab53cab0d8c100f04e86228ca500a8ca9de94b58/croniter-3.0.3-py2.py3-none-any.whl", hash = "sha256:b3bd11f270dc54ccd1f2397b813436015a86d30ffc5a7a9438eec1ed916f2101", size = 22422 }, + { url = "https://files.pythonhosted.org/packages/3c/68/34c3d74d2af6ea98ff8a0b50d149cff26e88a3f09817121d1186e9185e97/croniter-5.0.1-py2.py3-none-any.whl", hash = "sha256:eb28439742291f6c10b181df1a5ecf421208b1fc62ef44501daec1780a0b09e9", size = 24149 }, ] [[package]] @@ -732,15 +1368,16 @@ wheels = [ [[package]] name = "django-admin-data-views" -version = "0.4.1" +version = "0.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, { name = "django-settings-holder" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/5a/24/6467910537747af96c6c447b221d0e1c36e64547368700f43aecbbfa3097/django_admin_data_views-0.4.1.tar.gz", hash = "sha256:fbdd2d5d0caf3b1cb1ffac57f7caff0e38f02dfc71dfa4e230c8c50f1741bb61", size = 12073 } +sdist = { url = "https://files.pythonhosted.org/packages/44/4f/3092990fa7ab550f5ab5b14eb8be272c141a7a768c118fcf3bf5f2c1259c/django_admin_data_views-0.4.2.tar.gz", hash = "sha256:d89310eaeae4e441267a27ba51b7cfe70f91d41da96d1cda73c1e8c46ba52d84", size = 12469 } wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/4b/087370e416b91dfce3a67bbc079fe202cdacbf6040e8fecf9bc96a66dbaf/django_admin_data_views-0.4.1-py3-none-any.whl", hash = "sha256:ed4988ce2f1c000bfa0ebef3b0126be1284399e03e23763eeb9d2c499745bf08", size = 15242 }, + { url = "https://files.pythonhosted.org/packages/32/86/5ab784b6a487cf85c9df03f66dcc7ffa817d9fe603fd44c0ba11bf0da590/django_admin_data_views-0.4.2-py3-none-any.whl", hash = "sha256:e7ebfc822187b53ff20f63b975745d660153f9735ab7d2c607bc5f7b90ff7ec2", size = 15319 }, ] [[package]] @@ -885,16 +1522,16 @@ wheels = [ [[package]] name = "django-settings-holder" -version = "0.1.2" +version = "0.2.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/36/0ea7d1e6e782c8c8ec7e8a8f0614526e732e3728dee7778d575f35267e3c/django_settings_holder-0.1.2.tar.gz", hash = "sha256:8ab0f2dabf5a1c79ec9e95e97a296808e0f2c48f6f9aa1da1b77b433ee1e2f9e", size = 6454 } +sdist = { url = "https://files.pythonhosted.org/packages/76/a2/eca3105add8254158ebc67b605a81aceeefc69238e3eae87ed50c2e2c438/django_settings_holder-0.2.2.tar.gz", hash = "sha256:a894e1a0e2573ff72ed752b97f5c8b03cda2745b64d3baff81db4ebd1e505b03", size = 8606 } wheels = [ - { url = "https://files.pythonhosted.org/packages/10/96/03b2ed31f267eeaf7d339d1f2ecd95d6ab6cb32a7dca3c3338e90a124c9b/django_settings_holder-0.1.2-py3-none-any.whl", hash = "sha256:7a65f888fc1e8427a807be72d43d5f3f242163e0a0eaf33a393592e6fff3e102", size = 8197 }, + { url = "https://files.pythonhosted.org/packages/4f/55/250e5b80c785e2ca36f7db3346df0ba38ed63930cf791b2ad926a26dd466/django_settings_holder-0.2.2-py3-none-any.whl", hash = "sha256:37f229d44686dd2dc6f82ff75213c90f633c5fea0492df9c5660c775fa5d6941", size = 8689 }, ] [[package]] name = "django-signal-webhooks" -version = "0.3.0" +version = "0.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "asgiref" }, @@ -902,15 +1539,16 @@ dependencies = [ { name = "django" }, { name = "django-settings-holder" }, { name = "httpx" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/1974349def31611eeb689651d50c744c1a0810d36dc3830a961ea950578e/django_signal_webhooks-0.3.0.tar.gz", hash = "sha256:3efff4305a8c0555a17ce8f4cbb1006014afd7314862647db5724e06eec4493e", size = 16566 } +sdist = { url = "https://files.pythonhosted.org/packages/41/15/865e72e1da78bc6c6865ff16b0dffb11db62999fc91bed8c3c1668eac4c1/django_signal_webhooks-0.3.1.tar.gz", hash = "sha256:23dc439be2fdea24b746726495eb1a7a59440809056482eebceb153d050a3f5b", size = 17806 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/75/63944fa0d6a854ce59158f5a02e4afc4d64dab5a1ddb3f25efe8747fdf10/django_signal_webhooks-0.3.0-py3-none-any.whl", hash = "sha256:64be32ff06c1b74fe80176395258cfb51f1757fed28f026285f38a44d559c00f", size = 22571 }, + { url = "https://files.pythonhosted.org/packages/eb/7a/0f193eb3351af74de8c3d0fa89f72005caf63ad9456e281e5cd9b2be1a10/django_signal_webhooks-0.3.1-py3-none-any.whl", hash = "sha256:863beb94f6536a09b04d516df6103037748891f5f4555df36796fb54c8649854", size = 22940 }, ] [[package]] name = "django-stubs" -version = "5.1.0" +version = "5.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "asgiref" }, @@ -920,22 +1558,22 @@ dependencies = [ { name = "types-pyyaml" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/47/35/fa62c35c48e77bc4dabfe56d16786a2e9965ff89c4c55ab909c2d9f00ce8/django_stubs-5.1.0.tar.gz", hash = "sha256:86128c228b65e6c9a85e5dc56eb1c6f41125917dae0e21e6cfecdf1b27e630c5", size = 265839 } +sdist = { url = "https://files.pythonhosted.org/packages/bf/60/1ae90eb6e2e107bc64a3de9de78a5add7f3b85e491113504eed38d6d2c63/django_stubs-5.1.1.tar.gz", hash = "sha256:126d354bbdff4906c4e93e6361197f6fbfb6231c3df6def85a291dae6f9f577b", size = 265624 } wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/d8/4561cf32a652f12d1f6edf27ac1ed6194540b44592cc85ead62a1f6fdff6/django_stubs-5.1.0-py3-none-any.whl", hash = "sha256:b98d49a80aa4adf1433a97407102d068de26c739c405431d93faad96dd282c40", size = 470607 }, + { url = "https://files.pythonhosted.org/packages/98/c8/3081d5f994351248fcd60f9aab10cb2020bdd7df0f14e80854373e15d7d4/django_stubs-5.1.1-py3-none-any.whl", hash = "sha256:c4dc64260bd72e6d32b9e536e8dd0d9247922f0271f82d1d5132a18f24b388ac", size = 470790 }, ] [[package]] name = "django-stubs-ext" -version = "5.1.0" +version = "5.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "django" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/a5/dfb120bf3ce1f0da109481605f704ffe72533f056f42e8cffd5a486504a7/django_stubs_ext-5.1.0.tar.gz", hash = "sha256:ed7d51c0b731651879fc75f331fb0806d98b67bfab464e96e2724db6b46ef926", size = 9491 } +sdist = { url = "https://files.pythonhosted.org/packages/ca/62/a7129909d3c94eac957c02eeb05ac57cbca81db4f3f6270a8503697f376a/django_stubs_ext-5.1.1.tar.gz", hash = "sha256:db7364e4f50ae7e5360993dbd58a3a57ea4b2e7e5bab0fbd525ccdb3e7975d1c", size = 9455 } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/03/20a5a19d1b8d26eddd8420261304ee9e6accd802f5332e360daaa2202afb/django_stubs_ext-5.1.0-py3-none-any.whl", hash = "sha256:a455fc222c90b30b29ad8c53319559f5b54a99b4197205ddbb385aede03b395d", size = 8966 }, + { url = "https://files.pythonhosted.org/packages/6a/ed/f79ae5ad993bdf900d61892d2a9fc0145441a507a7579890fb8e21e4a7bc/django_stubs_ext-5.1.1-py3-none-any.whl", hash = "sha256:3907f99e178c93323e2ce908aef8352adb8c047605161f8d9e5e7b4efb5a6a9c", size = 8965 }, ] [[package]] @@ -970,11 +1608,11 @@ wheels = [ [[package]] name = "et-xmlfile" -version = "1.1.0" +version = "2.0.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3d/5d/0413a31d184a20c763ad741cc7852a659bf15094c24840c5bdd1754765cd/et_xmlfile-1.1.0.tar.gz", hash = "sha256:8eb9e2bc2f8c97e37a2dc85a09ecdcdec9d8a396530a6d5a33b30b9a92da0c5c", size = 3218 } +sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 } wheels = [ - { url = "https://files.pythonhosted.org/packages/96/c2/3dd434b0108730014f1b96fd286040dc3bcb70066346f7e01ec2ac95865f/et_xmlfile-1.1.0-py3-none-any.whl", hash = "sha256:a2ba85d1d6a74ef63837eed693bcb89c3f752169b0e3e7ae5b16ca5e1b3deada", size = 4688 }, + { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 }, ] [[package]] @@ -1023,14 +1661,14 @@ wheels = [ [[package]] name = "ftfy" -version = "6.3.0" +version = "6.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "wcwidth" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/85/c3/63753eca4c5257ce0561cb5f8e9cd0d45d97848c73c56e33a0a764319e5b/ftfy-6.3.0.tar.gz", hash = "sha256:1c7d6418e72b25a7760feb150acf574b86924dbb2e95b32c0b3abbd1ba3d7ad6", size = 362118 } +sdist = { url = "https://files.pythonhosted.org/packages/a5/d3/8650919bc3c7c6e90ee3fa7fd618bf373cbbe55dff043bd67353dbb20cd8/ftfy-6.3.1.tar.gz", hash = "sha256:9b3c3d90f84fb267fe64d375a07b7f8912d817cf86009ae134aa03e1819506ec", size = 308927 } wheels = [ - { url = "https://files.pythonhosted.org/packages/76/0f/d8a8152e720cbcad890e56ee98639ff489f1992869b4cf304c3fa24d4bcc/ftfy-6.3.0-py3-none-any.whl", hash = "sha256:17aca296801f44142e3ff2c16f93fbf6a87609ebb3704a9a41dd5d4903396caf", size = 44778 }, + { url = "https://files.pythonhosted.org/packages/ab/6e/81d47999aebc1b155f81eca4477a616a70f238a2549848c38983f3c22a82/ftfy-6.3.1-py3-none-any.whl", hash = "sha256:7c70eb532015cd2f9adb53f101fb6c7945988d023a085d127d1573dc49dd0083", size = 44821 }, ] [[package]] @@ -1054,19 +1692,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] -[[package]] -name = "homebrew-pypi-poet" -version = "0.10.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "jinja2" }, - { name = "setuptools" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f7/d9/4b525af3be6ac0a0a962e101b7771db6511d9e96369ded2765406233f9ff/homebrew-pypi-poet-0.10.0.tar.gz", hash = "sha256:e09e997e35a98f66445f9a39ccb33d6d93c5cd090302a59f231707eac0bf378e", size = 5953 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/85/998232eae0b5c6798c7140ef37d2c1be02ea06cd38dd80169b3abd63b600/homebrew_pypi_poet-0.10.0-py2.py3-none-any.whl", hash = "sha256:65824f97aea0e713c4ac18aa2ef4477aca69426554eac842eeaaddf97df3fc47", size = 7813 }, -] - [[package]] name = "httpcore" version = "1.0.6" @@ -1182,7 +1807,7 @@ wheels = [ [[package]] name = "ipython" -version = "8.28.0" +version = "8.29.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -1197,9 +1822,9 @@ dependencies = [ { name = "traitlets" }, { name = "typing-extensions", marker = "python_full_version < '3.12'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f7/21/48db7d9dd622b9692575004c7c98f85f5629428f58596c59606d36c51b58/ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a", size = 5495762 } +sdist = { url = "https://files.pythonhosted.org/packages/85/e0/a3f36dde97e12121106807d80485423ae4c5b27ce60d40d4ab0bab18a9db/ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb", size = 5497513 } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/3a/5d8680279ada9571de8469220069d27024ee47624af534e537c9ff49a450/ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35", size = 819456 }, + { url = "https://files.pythonhosted.org/packages/c5/a5/c15ed187f1b3fac445bb42a2dedd8dec1eee1718b35129242049a13a962f/ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8", size = 819911 }, ] [[package]] @@ -1272,7 +1897,7 @@ wheels = [ [[package]] name = "logfire" -version = "1.2.0" +version = "2.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "executing" }, @@ -1284,9 +1909,9 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/85/ce3e4ecc59a0126eaa9802f7d928d6efa837e63619dfec37654fb2d1f1c1/logfire-1.2.0.tar.gz", hash = "sha256:71866c4ce2f604b307ff0cc1a9b1254ea68b3c46f42bffd6ac36fc4db5abb62b", size = 240418 } +sdist = { url = "https://files.pythonhosted.org/packages/ee/b8/b4f3a741076a9bdce82ed25218a8167d74c9834588710babc03cb587773a/logfire-2.1.1.tar.gz", hash = "sha256:fd0b9a8b3334cd8c7efb52c04297c2360380818a021e8024ca37bae5f32b78aa", size = 244832 } wheels = [ - { url = "https://files.pythonhosted.org/packages/7d/7f/37d9c3cbed1ef23b467c0c0039f35524595f8fd79f3acb54e647a0ccd590/logfire-1.2.0-py3-none-any.whl", hash = "sha256:edb2b441e418cf31877bd97e24b3755f873bb423f834cca66f315b25bde61ebd", size = 164724 }, + { url = "https://files.pythonhosted.org/packages/7e/93/905aef6a938fdd8633cf4937a35ae2438e2830788e8465588e1063ab79b5/logfire-2.1.1-py3-none-any.whl", hash = "sha256:5ead7b0f3edf6cab9bbe9a02e0f6a4c5f3f693411928b32b727ecb3d2b709814", size = 167207 }, ] [package.optional-dependencies] @@ -1420,36 +2045,36 @@ wheels = [ [[package]] name = "mypy" -version = "1.12.1" +version = "1.13.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/17/03/744330105a74dc004578f47ec27e1bf66b1dd5664ea444d18423e41343bd/mypy-1.12.1.tar.gz", hash = "sha256:f5b3936f7a6d0e8280c9bdef94c7ce4847f5cdfc258fbb2c29a8c1711e8bb96d", size = 3150767 } +sdist = { url = "https://files.pythonhosted.org/packages/e8/21/7e9e523537991d145ab8a0a2fd98548d67646dc2aaaf6091c31ad883e7c1/mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e", size = 3152532 } wheels = [ - { url = "https://files.pythonhosted.org/packages/16/90/3a83d3bcff2eb85151723f116336bd545995b5260a49d3e0d95213fcc2d7/mypy-1.12.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3d7d4371829184e22fda4015278fbfdef0327a4b955a483012bd2d423a788801", size = 11017908 }, - { url = "https://files.pythonhosted.org/packages/e4/5c/d6b32ddde2460fc63168ca0f7bf44f38474353547f7c0304a30023c40aa0/mypy-1.12.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f59f1dfbf497d473201356966e353ef09d4daec48caeacc0254db8ef633a28a5", size = 10184164 }, - { url = "https://files.pythonhosted.org/packages/42/5e/680aa37c938e6db23bd7e6dd4d38d7e609998491721e453b32ec10d31e7f/mypy-1.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b947097fae68004b8328c55161ac9db7d3566abfef72d9d41b47a021c2fba6b1", size = 12587852 }, - { url = "https://files.pythonhosted.org/packages/9e/0f/9cafea1c3aaf852cfa1d4a387f33923b6d9714b5c16eb0469da67c5c31e4/mypy-1.12.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:96af62050971c5241afb4701c15189ea9507db89ad07794a4ee7b4e092dc0627", size = 13106489 }, - { url = "https://files.pythonhosted.org/packages/ea/c3/7f56d5d87a81e665de8dfa424120ab3a6954ae5854946cec0a46f78f6168/mypy-1.12.1-cp310-cp310-win_amd64.whl", hash = "sha256:d90da248f4c2dba6c44ddcfea94bb361e491962f05f41990ff24dbd09969ce20", size = 9634753 }, - { url = "https://files.pythonhosted.org/packages/18/0a/70de7c97a86cb85535077ab5cef1cbc4e2812fd2e9cc21d78eb561a6b80f/mypy-1.12.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1230048fec1380faf240be6385e709c8570604d2d27ec6ca7e573e3bc09c3735", size = 10940998 }, - { url = "https://files.pythonhosted.org/packages/c0/97/9ed6d4834d7549936ab88533b302184fb568a0940c4000d2aaee6dc07112/mypy-1.12.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:02dcfe270c6ea13338210908f8cadc8d31af0f04cee8ca996438fe6a97b4ec66", size = 10108523 }, - { url = "https://files.pythonhosted.org/packages/48/41/1686f37d09c915dfc5b683e20cc99dabac199900b5ca6d22747b99ddcb50/mypy-1.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5a437c9102a6a252d9e3a63edc191a3aed5f2fcb786d614722ee3f4472e33f6", size = 12505553 }, - { url = "https://files.pythonhosted.org/packages/8d/2b/2dbcaa7e97b23f27ced77493256ee878f4a140ac750e198630ff1b9b60c6/mypy-1.12.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:186e0c8346efc027ee1f9acf5ca734425fc4f7dc2b60144f0fbe27cc19dc7931", size = 12988634 }, - { url = "https://files.pythonhosted.org/packages/54/55/710d082e91a2ccaea21214229b11f9215a9d22446f949491b5457655e82b/mypy-1.12.1-cp311-cp311-win_amd64.whl", hash = "sha256:673ba1140a478b50e6d265c03391702fa11a5c5aff3f54d69a62a48da32cb811", size = 9630747 }, - { url = "https://files.pythonhosted.org/packages/8a/74/b9e0e4f06e951e277058f878302faa154d282ca11274c59fe08353f52949/mypy-1.12.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9fb83a7be97c498176fb7486cafbb81decccaef1ac339d837c377b0ce3743a7f", size = 11079902 }, - { url = "https://files.pythonhosted.org/packages/9f/62/fcad290769db3eb0de265094cef5c94d6075c70bc1e42b67eee4ca192dcc/mypy-1.12.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:389e307e333879c571029d5b93932cf838b811d3f5395ed1ad05086b52148fb0", size = 10072373 }, - { url = "https://files.pythonhosted.org/packages/cb/27/9ac78349c2952e4446288ec1174675ab9e0160ed18c2cb1154fa456c54e8/mypy-1.12.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:94b2048a95a21f7a9ebc9fbd075a4fcd310410d078aa0228dbbad7f71335e042", size = 12589779 }, - { url = "https://files.pythonhosted.org/packages/7c/4a/58cebd122cf1cba95680ac51303fbeb508392413ca64e3e711aa7d4877aa/mypy-1.12.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ee5932370ccf7ebf83f79d1c157a5929d7ea36313027b0d70a488493dc1b179", size = 13044459 }, - { url = "https://files.pythonhosted.org/packages/5b/c7/672935e2a3f9bcc07b1b870395a653f665657bef3cdaa504ad99f56eadf0/mypy-1.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:19bf51f87a295e7ab2894f1d8167622b063492d754e69c3c2fed6563268cb42a", size = 9731919 }, - { url = "https://files.pythonhosted.org/packages/bb/b0/092be5094840a401940c95224f63bb2a8f09bce9251ac1df180ec523830c/mypy-1.12.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d34167d43613ffb1d6c6cdc0cc043bb106cac0aa5d6a4171f77ab92a3c758bcc", size = 11068611 }, - { url = "https://files.pythonhosted.org/packages/9a/86/f20f53b8f062876c39602243d7a59b5cabd6b24315d8de511d607fa4de6a/mypy-1.12.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:427878aa54f2e2c5d8db31fa9010c599ed9f994b3b49e64ae9cd9990c40bd635", size = 10068036 }, - { url = "https://files.pythonhosted.org/packages/84/c7/1dbd6575785522da1d4c1ac2c419505fcf23bee74811880cac447a4a77ab/mypy-1.12.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5fcde63ea2c9f69d6be859a1e6dd35955e87fa81de95bc240143cf00de1f7f81", size = 12585671 }, - { url = "https://files.pythonhosted.org/packages/46/8a/f6ae18b446eb2bccce54c4bd94065bcfe417d6c67021dcc032bf1e720aff/mypy-1.12.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d54d840f6c052929f4a3d2aab2066af0f45a020b085fe0e40d4583db52aab4e4", size = 13036083 }, - { url = "https://files.pythonhosted.org/packages/59/e6/fc65fde3dc7156fce8d49ba21c7b1f5d866ad50467bf196ca94a7f6d2c9e/mypy-1.12.1-cp313-cp313-win_amd64.whl", hash = "sha256:20db6eb1ca3d1de8ece00033b12f793f1ea9da767334b7e8c626a4872090cf02", size = 9735467 }, - { url = "https://files.pythonhosted.org/packages/84/6b/1db9de4e0764778251fb2d64cb7455cf6db75dc99c9f72c8b7e74b6a8a17/mypy-1.12.1-py3-none-any.whl", hash = "sha256:ce561a09e3bb9863ab77edf29ae3a50e65685ad74bba1431278185b7e5d5486e", size = 2646060 }, + { url = "https://files.pythonhosted.org/packages/5e/8c/206de95a27722b5b5a8c85ba3100467bd86299d92a4f71c6b9aa448bfa2f/mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a", size = 11020731 }, + { url = "https://files.pythonhosted.org/packages/ab/bb/b31695a29eea76b1569fd28b4ab141a1adc9842edde080d1e8e1776862c7/mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80", size = 10184276 }, + { url = "https://files.pythonhosted.org/packages/a5/2d/4a23849729bb27934a0e079c9c1aad912167d875c7b070382a408d459651/mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7", size = 12587706 }, + { url = "https://files.pythonhosted.org/packages/5c/c3/d318e38ada50255e22e23353a469c791379825240e71b0ad03e76ca07ae6/mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f", size = 13105586 }, + { url = "https://files.pythonhosted.org/packages/4a/25/3918bc64952370c3dbdbd8c82c363804678127815febd2925b7273d9482c/mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372", size = 9632318 }, + { url = "https://files.pythonhosted.org/packages/d0/19/de0822609e5b93d02579075248c7aa6ceaddcea92f00bf4ea8e4c22e3598/mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d", size = 10939027 }, + { url = "https://files.pythonhosted.org/packages/c8/71/6950fcc6ca84179137e4cbf7cf41e6b68b4a339a1f5d3e954f8c34e02d66/mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d", size = 10108699 }, + { url = "https://files.pythonhosted.org/packages/26/50/29d3e7dd166e74dc13d46050b23f7d6d7533acf48f5217663a3719db024e/mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b", size = 12506263 }, + { url = "https://files.pythonhosted.org/packages/3f/1d/676e76f07f7d5ddcd4227af3938a9c9640f293b7d8a44dd4ff41d4db25c1/mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73", size = 12984688 }, + { url = "https://files.pythonhosted.org/packages/9c/03/5a85a30ae5407b1d28fab51bd3e2103e52ad0918d1e68f02a7778669a307/mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca", size = 9626811 }, + { url = "https://files.pythonhosted.org/packages/fb/31/c526a7bd2e5c710ae47717c7a5f53f616db6d9097caf48ad650581e81748/mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5", size = 11077900 }, + { url = "https://files.pythonhosted.org/packages/83/67/b7419c6b503679d10bd26fc67529bc6a1f7a5f220bbb9f292dc10d33352f/mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e", size = 10074818 }, + { url = "https://files.pythonhosted.org/packages/ba/07/37d67048786ae84e6612575e173d713c9a05d0ae495dde1e68d972207d98/mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2", size = 12589275 }, + { url = "https://files.pythonhosted.org/packages/1f/17/b1018c6bb3e9f1ce3956722b3bf91bff86c1cefccca71cec05eae49d6d41/mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0", size = 13037783 }, + { url = "https://files.pythonhosted.org/packages/cb/32/cd540755579e54a88099aee0287086d996f5a24281a673f78a0e14dba150/mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2", size = 9726197 }, + { url = "https://files.pythonhosted.org/packages/11/bb/ab4cfdc562cad80418f077d8be9b4491ee4fb257440da951b85cbb0a639e/mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7", size = 11069721 }, + { url = "https://files.pythonhosted.org/packages/59/3b/a393b1607cb749ea2c621def5ba8c58308ff05e30d9dbdc7c15028bca111/mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62", size = 10063996 }, + { url = "https://files.pythonhosted.org/packages/d1/1f/6b76be289a5a521bb1caedc1f08e76ff17ab59061007f201a8a18cc514d1/mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8", size = 12584043 }, + { url = "https://files.pythonhosted.org/packages/a6/83/5a85c9a5976c6f96e3a5a7591aa28b4a6ca3a07e9e5ba0cec090c8b596d6/mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7", size = 13036996 }, + { url = "https://files.pythonhosted.org/packages/b4/59/c39a6f752f1f893fccbcf1bdd2aca67c79c842402b5283563d006a67cf76/mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc", size = 9737709 }, + { url = "https://files.pythonhosted.org/packages/3b/86/72ce7f57431d87a7ff17d442f521146a6585019eb8f4f31b7c02801f78ad/mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a", size = 2647043 }, ] [[package]] @@ -1679,20 +2304,20 @@ wheels = [ [[package]] name = "phonenumbers" -version = "8.13.47" +version = "8.13.48" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/0c/8f315d5e6ddea2e45ae13ada6936df6240858929881daf20cb3133fdb729/phonenumbers-8.13.47.tar.gz", hash = "sha256:53c5e7c6d431cafe4efdd44956078404ae9bc8b0eacc47be3105d3ccc88aaffa", size = 2297081 } +sdist = { url = "https://files.pythonhosted.org/packages/61/59/d01506a791481d26a640acb0a1124e3f0a816b0711e563962d7d55184890/phonenumbers-8.13.48.tar.gz", hash = "sha256:62d8df9b0f3c3c41571c6b396f044ddd999d61631534001b8be7fdf7ba1b18f3", size = 2297098 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b6/0b/5cde445764ac72460748107e999b026b7245e3fcc5fd5551cc5aff45e469/phonenumbers-8.13.47-py2.py3-none-any.whl", hash = "sha256:5d3c0142ef7055ca5551884352e3b6b93bfe002a0bc95b8eaba39b0e2184541b", size = 2582530 }, + { url = "https://files.pythonhosted.org/packages/98/f4/a9340f98335ae6fab1ad4b56b6a04f390de65bea371c71b0cdf67e4c08d0/phonenumbers-8.13.48-py2.py3-none-any.whl", hash = "sha256:5c51939acefa390eb74119750afb10a85d3c628dc83fd62c52d6f532fcf5d205", size = 2582542 }, ] [[package]] name = "pip" -version = "24.2" +version = "24.3.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4d/87/fb90046e096a03aeab235e139436b3fe804cdd447ed2093b0d70eba3f7f8/pip-24.2.tar.gz", hash = "sha256:5b5e490b5e9cb275c879595064adce9ebd31b854e3e803740b72f9ccf34a45b8", size = 1922041 } +sdist = { url = "https://files.pythonhosted.org/packages/f4/b1/b422acd212ad7eedddaf7981eee6e5de085154ff726459cf2da7c5a184c1/pip-24.3.1.tar.gz", hash = "sha256:ebcb60557f2aefabc2e0f918751cd24ea0d56d8ec5445fe1807f1d2109660b99", size = 1931073 } wheels = [ - { url = "https://files.pythonhosted.org/packages/d4/55/90db48d85f7689ec6f81c0db0622d704306c5284850383c090e6c7195a5c/pip-24.2-py3-none-any.whl", hash = "sha256:2cd581cf58ab7fcfca4ce8efa6dcacd0de5bf8d0a3eb9ec927e07405f4d9e2a2", size = 1815170 }, + { url = "https://files.pythonhosted.org/packages/ef/7d/500c9ad20238fcfcb4cb9243eede163594d7020ce87bd9610c9e02771876/pip-24.3.1-py3-none-any.whl", hash = "sha256:3790624780082365f47549d032f3770eeb2b1e8bd1f7b2e02dace1afa361b4ed", size = 1822182 }, ] [[package]] @@ -1713,6 +2338,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "pocket" +version = "0.3.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/57/b6/cd79a0e237e733e2f8a196f4e9f4d30d99c769b809c5fbbea9e34400655d/pocket-0.3.6.tar.gz", hash = "sha256:907bf16a19fae9c2080f799d979de4c8daa36d6d28e86ceb9fc17d6f0bdb89b9", size = 3749 } + [[package]] name = "prompt-toolkit" version = "3.0.48" @@ -1945,15 +2579,15 @@ wheels = [ [[package]] name = "pydantic-settings" -version = "2.6.0" +version = "2.6.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pydantic" }, { name = "python-dotenv" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/6c/66/5f1a9da10675bfb3b9da52f5b689c77e0a5612263fcce510cfac3e99a168/pydantic_settings-2.6.0.tar.gz", hash = "sha256:44a1804abffac9e6a30372bb45f6cafab945ef5af25e66b1c634c01dd39e0188", size = 75232 } +sdist = { url = "https://files.pythonhosted.org/packages/b5/d4/9dfbe238f45ad8b168f5c96ee49a3df0598ce18a0795a983b419949ce65b/pydantic_settings-2.6.1.tar.gz", hash = "sha256:e0f92546d8a9923cb8941689abf85d6601a8c19a23e97a34b2964a2e3f813ca0", size = 75646 } wheels = [ - { url = "https://files.pythonhosted.org/packages/34/19/26bb6bdb9fdad5f0dfce538780814084fb667b4bc37fcb28459c14b8d3b5/pydantic_settings-2.6.0-py3-none-any.whl", hash = "sha256:4a819166f119b74d7f8c765196b165f95cc7487ce58ea27dec8a5a26be0970e0", size = 28578 }, + { url = "https://files.pythonhosted.org/packages/5e/f9/ff95fd7d760af42f647ea87f9b8a383d891cdb5e5dbd4613edaeb094252a/pydantic_settings-2.6.1-py3-none-any.whl", hash = "sha256:7fb0637c786a558d3103436278a7c4f1cfd29ba8973238a50c5bb9a55387da87", size = 28595 }, ] [[package]] @@ -2097,6 +2731,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", size = 10051 }, ] +[[package]] +name = "python-statemachine" +version = "2.3.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/c9/7034a362ce151f9fa0ead5630727a16122f7a5ed235d42447910dff95b6a/python_statemachine-2.3.6.tar.gz", hash = "sha256:9cb4040ca7f2158d3cd46f36a77b420b6ef95a90223928a7f3cab232a70bd560", size = 36735 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/04/19a44b549cbaae1ac6c2acc58afb96b71209da866713877f40aab2f45de6/python_statemachine-2.3.6-py3-none-any.whl", hash = "sha256:0001b02cbe2f5b2420c423b5b3e3a33915447ac6d9735219c929e2378d454f5f", size = 41529 }, +] + [[package]] name = "python-stdnum" version = "1.20" @@ -2272,58 +2915,58 @@ wheels = [ [[package]] name = "rich" -version = "13.9.2" +version = "13.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/aa/9e/1784d15b057b0075e5136445aaea92d23955aad2c93eaede673718a40d95/rich-13.9.2.tar.gz", hash = "sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c", size = 222843 } +sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149 } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/91/5474b84e505a6ccc295b2d322d90ff6aa0746745717839ee0c5fb4fdcceb/rich-13.9.2-py3-none-any.whl", hash = "sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1", size = 242117 }, + { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 }, ] [[package]] name = "rich-argparse" -version = "1.5.2" +version = "1.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "rich" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/26/f1/0a5ba24d684012e2f25deec88d9a9a1199d8e26e3bb595b812c8b0218cff/rich_argparse-1.5.2.tar.gz", hash = "sha256:84d348d5b6dafe99fffe2c7ea1ca0afe14096c921693445b9eee65ee4fcbfd2c", size = 17142 } +sdist = { url = "https://files.pythonhosted.org/packages/7f/ee/c410251ff6123d4417f2fe8e72c8628f187682b70ce34134a2a3e307a2d5/rich_argparse-1.6.0.tar.gz", hash = "sha256:092083c30da186f25bcdff8b1d47fdfb571288510fb051e0488a72cc3128de13", size = 17499 } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/83/4585bd18f0cda471ce44b8364620dc9cbb7ce7179b923123ad3feddf99da/rich_argparse-1.5.2-py3-none-any.whl", hash = "sha256:7027503d5849e27fc7cc85fb58504363606f2ec1c8b3c27d9a8ad28788faf877", size = 19777 }, + { url = "https://files.pythonhosted.org/packages/25/45/54b95bb72bb17c27a7252bee5034955020b5869a33918b660ffc29cbf608/rich_argparse-1.6.0-py3-none-any.whl", hash = "sha256:fbe70a1d821b3f2fa8958cddf0cae131870a6e9faa04ab52b409cb1eda809bd7", size = 20072 }, ] [[package]] name = "ruff" -version = "0.7.0" +version = "0.7.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/2c/c7/f3367d1da5d568192968c5c9e7f3d51fb317b9ac04828493b23d8fce8ce6/ruff-0.7.0.tar.gz", hash = "sha256:47a86360cf62d9cd53ebfb0b5eb0e882193fc191c6d717e8bef4462bc3b9ea2b", size = 3146645 } +sdist = { url = "https://files.pythonhosted.org/packages/95/51/231bb3790e5b0b9fd4131f9a231d73d061b3667522e3f406fd9b63334d0e/ruff-0.7.2.tar.gz", hash = "sha256:2b14e77293380e475b4e3a7a368e14549288ed2931fce259a6f99978669e844f", size = 3210036 } wheels = [ - { url = "https://files.pythonhosted.org/packages/48/59/a0275a0913f3539498d116046dd679cd657fe3b7caf5afe1733319414932/ruff-0.7.0-py3-none-linux_armv6l.whl", hash = "sha256:0cdf20c2b6ff98e37df47b2b0bd3a34aaa155f59a11182c1303cce79be715628", size = 10434007 }, - { url = "https://files.pythonhosted.org/packages/cd/94/da0ba5f956d04c90dd899209904210600009dcda039ce840d83eb4298c7d/ruff-0.7.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:496494d350c7fdeb36ca4ef1c9f21d80d182423718782222c29b3e72b3512737", size = 10048066 }, - { url = "https://files.pythonhosted.org/packages/57/1d/e5cc149ecc46e4f203403a79ccd170fad52d316f98b87d0f63b1945567db/ruff-0.7.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:214b88498684e20b6b2b8852c01d50f0651f3cc6118dfa113b4def9f14faaf06", size = 9711389 }, - { url = "https://files.pythonhosted.org/packages/05/67/fb7ea2c869c539725a16c5bc294e9aa34f8b1b6fe702f1d173a5da517c2b/ruff-0.7.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:630fce3fefe9844e91ea5bbf7ceadab4f9981f42b704fae011bb8efcaf5d84be", size = 10755174 }, - { url = "https://files.pythonhosted.org/packages/5f/f0/13703bc50536a0613ea3dce991116e5f0917a1f05528c6ab738b33c08d3f/ruff-0.7.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:211d877674e9373d4bb0f1c80f97a0201c61bcd1e9d045b6e9726adc42c156aa", size = 10196040 }, - { url = "https://files.pythonhosted.org/packages/99/c1/77b04ab20324ab03d333522ee55fb0f1c38e3ca0d326b4905f82ce6b6c70/ruff-0.7.0-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:194d6c46c98c73949a106425ed40a576f52291c12bc21399eb8f13a0f7073495", size = 11033684 }, - { url = "https://files.pythonhosted.org/packages/f2/97/f463334dc4efeea3551cd109163df15561c18a1c3ec13d51643740fd36ba/ruff-0.7.0-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:82c2579b82b9973a110fab281860403b397c08c403de92de19568f32f7178598", size = 11803700 }, - { url = "https://files.pythonhosted.org/packages/b4/f8/a31d40c4bb92933d376a53e7c5d0245d9b27841357e4820e96d38f54b480/ruff-0.7.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9af971fe85dcd5eaed8f585ddbc6bdbe8c217fb8fcf510ea6bca5bdfff56040e", size = 11347848 }, - { url = "https://files.pythonhosted.org/packages/83/62/0c133b35ddaf91c65c30a56718b80bdef36bfffc35684d29e3a4878e0ea3/ruff-0.7.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b641c7f16939b7d24b7bfc0be4102c56562a18281f84f635604e8a6989948914", size = 12480632 }, - { url = "https://files.pythonhosted.org/packages/46/96/464058dd1d980014fb5aa0a1254e78799efb3096fc7a4823cd66a1621276/ruff-0.7.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d71672336e46b34e0c90a790afeac8a31954fd42872c1f6adaea1dff76fd44f9", size = 10941919 }, - { url = "https://files.pythonhosted.org/packages/a0/f7/bda37ec77986a435dde44e1f59374aebf4282a5fa9cf17735315b847141f/ruff-0.7.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ab7d98c7eed355166f367597e513a6c82408df4181a937628dbec79abb2a1fe4", size = 10745519 }, - { url = "https://files.pythonhosted.org/packages/c2/33/5f77fc317027c057b61a848020a47442a1cbf12e592df0e41e21f4d0f3bd/ruff-0.7.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:1eb54986f770f49edb14f71d33312d79e00e629a57387382200b1ef12d6a4ef9", size = 10284872 }, - { url = "https://files.pythonhosted.org/packages/ff/50/98aec292bc9537f640b8d031c55f3414bf15b6ed13b3e943fed75ac927b9/ruff-0.7.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:dc452ba6f2bb9cf8726a84aa877061a2462afe9ae0ea1d411c53d226661c601d", size = 10600334 }, - { url = "https://files.pythonhosted.org/packages/f2/85/12607ae3201423a179b8cfadc7cb1e57d02cd0135e45bd0445acb4cef327/ruff-0.7.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:4b406c2dce5be9bad59f2de26139a86017a517e6bcd2688da515481c05a2cb11", size = 11017333 }, - { url = "https://files.pythonhosted.org/packages/d4/7f/3b85a56879e705d5f46ec14daf8a439fca05c3081720fe3dc3209100922d/ruff-0.7.0-py3-none-win32.whl", hash = "sha256:f6c968509f767776f524a8430426539587d5ec5c662f6addb6aa25bc2e8195ec", size = 8570962 }, - { url = "https://files.pythonhosted.org/packages/39/9f/c5ee2b40d377354dabcc23cff47eb299de4b4d06d345068f8f8cc1eadac8/ruff-0.7.0-py3-none-win_amd64.whl", hash = "sha256:ff4aabfbaaba880e85d394603b9e75d32b0693152e16fa659a3064a85df7fce2", size = 9365544 }, - { url = "https://files.pythonhosted.org/packages/89/8b/ee1509f60148cecba644aa718f6633216784302458340311898aaf0b1bed/ruff-0.7.0-py3-none-win_arm64.whl", hash = "sha256:10842f69c245e78d6adec7e1db0a7d9ddc2fff0621d730e61657b64fa36f207e", size = 8695763 }, + { url = "https://files.pythonhosted.org/packages/5c/56/0caa2b5745d66a39aa239c01059f6918fc76ed8380033d2f44bf297d141d/ruff-0.7.2-py3-none-linux_armv6l.whl", hash = "sha256:b73f873b5f52092e63ed540adefc3c36f1f803790ecf2590e1df8bf0a9f72cb8", size = 10373973 }, + { url = "https://files.pythonhosted.org/packages/1a/33/cad6ff306731f335d481c50caa155b69a286d5b388e87ff234cd2a4b3557/ruff-0.7.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5b813ef26db1015953daf476202585512afd6a6862a02cde63f3bafb53d0b2d4", size = 10171140 }, + { url = "https://files.pythonhosted.org/packages/97/f5/6a2ca5c9ba416226eac9cf8121a1baa6f06655431937e85f38ffcb9d0d01/ruff-0.7.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:853277dbd9675810c6826dad7a428d52a11760744508340e66bf46f8be9701d9", size = 9809333 }, + { url = "https://files.pythonhosted.org/packages/16/83/e3e87f13d1a1dc205713632978cd7bc287a59b08bc95780dbe359b9aefcb/ruff-0.7.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21aae53ab1490a52bf4e3bf520c10ce120987b047c494cacf4edad0ba0888da2", size = 10622987 }, + { url = "https://files.pythonhosted.org/packages/22/16/97ccab194480e99a2e3c77ae132b3eebfa38c2112747570c403a4a13ba3a/ruff-0.7.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ccc7e0fc6e0cb3168443eeadb6445285abaae75142ee22b2b72c27d790ab60ba", size = 10184640 }, + { url = "https://files.pythonhosted.org/packages/97/1b/82ff05441b036f68817296c14f24da47c591cb27acfda473ee571a5651ac/ruff-0.7.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fd77877a4e43b3a98e5ef4715ba3862105e299af0c48942cc6d51ba3d97dc859", size = 11210203 }, + { url = "https://files.pythonhosted.org/packages/a6/96/7ecb30a7ef7f942e2d8e0287ad4c1957dddc6c5097af4978c27cfc334f97/ruff-0.7.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:e00163fb897d35523c70d71a46fbaa43bf7bf9af0f4534c53ea5b96b2e03397b", size = 11870894 }, + { url = "https://files.pythonhosted.org/packages/06/6a/c716bb126218227f8e604a9c484836257708a05ee3d2ebceb666ff3d3867/ruff-0.7.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3c54b538633482dc342e9b634d91168fe8cc56b30a4b4f99287f4e339103e88", size = 11449533 }, + { url = "https://files.pythonhosted.org/packages/e6/2f/3a5f9f9478904e5ae9506ea699109070ead1e79aac041e872cbaad8a7458/ruff-0.7.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b792468e9804a204be221b14257566669d1db5c00d6bb335996e5cd7004ba80", size = 12607919 }, + { url = "https://files.pythonhosted.org/packages/a0/57/4642e57484d80d274750dcc872ea66655bbd7e66e986fede31e1865b463d/ruff-0.7.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dba53ed84ac19ae4bfb4ea4bf0172550a2285fa27fbb13e3746f04c80f7fa088", size = 11016915 }, + { url = "https://files.pythonhosted.org/packages/4d/6d/59be6680abee34c22296ae3f46b2a3b91662b8b18ab0bf388b5eb1355c97/ruff-0.7.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b19fafe261bf741bca2764c14cbb4ee1819b67adb63ebc2db6401dcd652e3748", size = 10625424 }, + { url = "https://files.pythonhosted.org/packages/82/e7/f6a643683354c9bc7879d2f228ee0324fea66d253de49273a0814fba1927/ruff-0.7.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:28bd8220f4d8f79d590db9e2f6a0674f75ddbc3847277dd44ac1f8d30684b828", size = 10233692 }, + { url = "https://files.pythonhosted.org/packages/d7/48/b4e02fc835cd7ed1ee7318d9c53e48bcf6b66301f55925a7dcb920e45532/ruff-0.7.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9fd67094e77efbea932e62b5d2483006154794040abb3a5072e659096415ae1e", size = 10751825 }, + { url = "https://files.pythonhosted.org/packages/1e/06/6c5ee6ab7bb4cbad9e8bb9b2dd0d818c759c90c1c9e057c6ed70334b97f4/ruff-0.7.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:576305393998b7bd6c46018f8104ea3a9cb3fa7908c21d8580e3274a3b04b691", size = 11074811 }, + { url = "https://files.pythonhosted.org/packages/a1/16/8969304f25bcd0e4af1778342e63b715e91db8a2dbb51807acd858cba915/ruff-0.7.2-py3-none-win32.whl", hash = "sha256:fa993cfc9f0ff11187e82de874dfc3611df80852540331bc85c75809c93253a8", size = 8650268 }, + { url = "https://files.pythonhosted.org/packages/d9/18/c4b00d161def43fe5968e959039c8f6ce60dca762cec4a34e4e83a4210a0/ruff-0.7.2-py3-none-win_amd64.whl", hash = "sha256:dd8800cbe0254e06b8fec585e97554047fb82c894973f7ff18558eee33d1cb88", size = 9433693 }, + { url = "https://files.pythonhosted.org/packages/7f/7b/c920673ac01c19814dd15fc617c02301c522f3d6812ca2024f4588ed4549/ruff-0.7.2-py3-none-win_arm64.whl", hash = "sha256:bb8368cd45bba3f57bb29cbb8d64b4a33f8415d0149d2655c5c8539452ce7760", size = 8735845 }, ] [[package]] name = "service-identity" -version = "24.1.0" +version = "24.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -2331,18 +2974,18 @@ dependencies = [ { name = "pyasn1" }, { name = "pyasn1-modules" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/38/d2/2ac20fd05f1b6fce31986536da4caeac51ed2e1bb25d4a7d73ca4eccdfab/service_identity-24.1.0.tar.gz", hash = "sha256:6829c9d62fb832c2e1c435629b0a8c476e1929881f28bee4d20bc24161009221", size = 40183 } +sdist = { url = "https://files.pythonhosted.org/packages/07/a5/dfc752b979067947261dbbf2543470c58efe735c3c1301dd870ef27830ee/service_identity-24.2.0.tar.gz", hash = "sha256:b8683ba13f0d39c6cd5d625d2c5f65421d6d707b013b375c355751557cbe8e09", size = 39245 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/92/44669afe6354a7bed9968013862118c401690d8b5a805bab75ac1764845f/service_identity-24.1.0-py3-none-any.whl", hash = "sha256:a28caf8130c8a5c1c7a6f5293faaf239bbfb7751e4862436920ee6f2616f568a", size = 12037 }, + { url = "https://files.pythonhosted.org/packages/08/2c/ca6dd598b384bc1ce581e24aaae0f2bed4ccac57749d5c3befbb5e742081/service_identity-24.2.0-py3-none-any.whl", hash = "sha256:6b047fbd8a84fd0bb0d55ebce4031e400562b9196e1e0d3e0fe2b8a59f6d4a85", size = 11364 }, ] [[package]] name = "setuptools" -version = "75.2.0" +version = "75.3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/07/37/b31be7e4b9f13b59cde9dcaeff112d401d49e0dc5b37ed4a9fc8fb12f409/setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec", size = 1350308 } +sdist = { url = "https://files.pythonhosted.org/packages/ed/22/a438e0caa4576f8c383fa4d35f1cc01655a46c75be358960d815bfbb12bd/setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686", size = 1351577 } wheels = [ - { url = "https://files.pythonhosted.org/packages/31/2d/90165d51ecd38f9a02c6832198c13a4e48652485e2ccf863ebb942c531b6/setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8", size = 1249825 }, + { url = "https://files.pythonhosted.org/packages/90/12/282ee9bce8b58130cb762fbc9beabd531549952cac11fc56add11dcb7ea0/setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd", size = 1251070 }, ] [[package]] @@ -2577,7 +3220,7 @@ wheels = [ [[package]] name = "twisted" -version = "24.7.0" +version = "24.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -2588,9 +3231,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "zope-interface" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8b/bf/f30eb89bcd14a21a36b4cd3d96658432d4c590af3c24bbe08ea77fa7bbbb/twisted-24.7.0.tar.gz", hash = "sha256:5a60147f044187a127ec7da96d170d49bcce50c6fd36f594e60f4587eff4d394", size = 3516844 } +sdist = { url = "https://files.pythonhosted.org/packages/b2/0f/2d0b0dcd52a849db64ff63619aead94ae1091fe4d4d7e100371efe513585/twisted-24.10.0.tar.gz", hash = "sha256:02951299672595fea0f70fa2d5f7b5e3d56836157eda68859a6ad6492d36756e", size = 3525999 } wheels = [ - { url = "https://files.pythonhosted.org/packages/49/d2/7b3e869b983fbf29d770fc2893f8df7c1739c6ff03a2b926b4fc43e4263e/twisted-24.7.0-py3-none-any.whl", hash = "sha256:734832ef98108136e222b5230075b1079dad8a3fc5637319615619a7725b0c81", size = 3181556 }, + { url = "https://files.pythonhosted.org/packages/f9/7c/f80f6853d702782edb357190c42c3973f13c547a5f68ab1b17e6415061b8/twisted-24.10.0-py3-none-any.whl", hash = "sha256:67aa7c8aa94387385302acf44ade12967c747858c8bcce0f11d38077a11c5326", size = 3188753 }, ] [package.optional-dependencies] @@ -2689,27 +3332,27 @@ wheels = [ [[package]] name = "uv" -version = "0.4.25" +version = "0.4.29" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d0/bc/1a013408b7f9f437385705652f404b6b15127ecf108327d13be493bdfb81/uv-0.4.25.tar.gz", hash = "sha256:d39077cdfe3246885fcdf32e7066ae731a166101d063629f9cea08738f79e6a3", size = 2064863 } +sdist = { url = "https://files.pythonhosted.org/packages/6a/23/6e8d8177112b40d4905a49c03d397c5b93eb030f87cdddf0c5d4be599fc9/uv-0.4.29.tar.gz", hash = "sha256:9c559b6fdc042add463e86afa1c210716f7020bfc2e96b00df5af7afcb587ce7", size = 2102901 } wheels = [ - { url = "https://files.pythonhosted.org/packages/84/18/9c9056d373620b1cf5182ce9b2d258e86d117d667cf8883e12870f2a5edf/uv-0.4.25-py3-none-linux_armv6l.whl", hash = "sha256:94fb2b454afa6bdfeeea4b4581c878944ca9cf3a13712e6762f245f5fbaaf952", size = 13028246 }, - { url = "https://files.pythonhosted.org/packages/a1/19/8a3f09aba30ac5433dfecde55d5241a07c96bb12340c3b810bc58188a12e/uv-0.4.25-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:a7c3a18c20ddb527d296d1222bddf42b78031c50b5b4609d426569b5fb61f5b0", size = 13175265 }, - { url = "https://files.pythonhosted.org/packages/e8/c9/2f924bb29bd53c51b839c1c6126bd2cf4c451d4a7d8f34be078f9e31c57e/uv-0.4.25-py3-none-macosx_11_0_arm64.whl", hash = "sha256:18100f0f36419a154306ed6211e3490bf18384cdf3f1a0950848bf64b62fa251", size = 12255610 }, - { url = "https://files.pythonhosted.org/packages/b2/5a/d8f8971aeb3389679505cf633a786cd72a96ce232f80f14cfe5a693b4c64/uv-0.4.25-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:6e981b1465e30102e41946adede9cb08051a5d70c6daf09f91a7ea84f0b75c08", size = 12506511 }, - { url = "https://files.pythonhosted.org/packages/e3/96/8c73520daeba5022cec8749e44afd4ca9ef774bf728af9c258bddec3577f/uv-0.4.25-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:578ae385fad6bd6f3868828e33d54994c716b315b1bc49106ec1f54c640837e4", size = 12836250 }, - { url = "https://files.pythonhosted.org/packages/67/3d/b0e810d365fb154fe1d380a0f43ee35a683cf9162f2501396d711bec2621/uv-0.4.25-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d29a78f011ecc2f31c13605acb6574c2894c06d258b0f8d0dbb899986800450", size = 13521303 }, - { url = "https://files.pythonhosted.org/packages/2d/f4/dd3830ec7fc6e7e5237c184f30f2dbfed4f93605e472147eca1373bcc72b/uv-0.4.25-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ec181be2bda10651a3558156409ac481549983e0276d0e3645e3b1464e7f8715", size = 14105308 }, - { url = "https://files.pythonhosted.org/packages/f4/4e/0fca02f8681e4870beda172552e747e0424f6e9186546b00a5e92525fea9/uv-0.4.25-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:50c7d0d9e7f392f81b13bf3b7e37768d1486f2fc9d533a54982aa0ed11e4db23", size = 13859475 }, - { url = "https://files.pythonhosted.org/packages/33/07/1100e9bc652f2850930f466869515d16ffe9582aaaaa99bac332ebdfe3ea/uv-0.4.25-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2fc35b5273f1e018aecd66b70e0fd7d2eb6698853dde3e2fc644e7ebf9f825b1", size = 18100840 }, - { url = "https://files.pythonhosted.org/packages/fa/98/ba1cb7dd2aa639a064a9e49721e08f12a3424456d60dde1327e7c6437930/uv-0.4.25-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7022a71ff63a3838796f40e954b76bf7820fc27e96fe002c537e75ff8e34f1d", size = 13645464 }, - { url = "https://files.pythonhosted.org/packages/0d/05/b97fb8c828a070e8291826922b2712d1146b11563b4860bc9ba80f5635d1/uv-0.4.25-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:e02afb0f6d4b58718347f7d7cfa5a801e985ce42181ba971ed85ef149f6658ca", size = 12694995 }, - { url = "https://files.pythonhosted.org/packages/b3/97/63df050811379130202898f60e735a1a331ba3a93b8aa1e9bb466f533913/uv-0.4.25-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:3d7680795ea78cdbabbcce73d039b2651cf1fa635ddc1aa3082660f6d6255c50", size = 12831737 }, - { url = "https://files.pythonhosted.org/packages/dc/e0/08352dcffa6e8435328861ea60b2c05e8bd030f1e93998443ba66209db7b/uv-0.4.25-py3-none-musllinux_1_1_i686.whl", hash = "sha256:aae9dcafd20d5ba978c8a4939ab942e8e2e155c109e9945207fbbd81d2892c9e", size = 13273529 }, - { url = "https://files.pythonhosted.org/packages/25/f4/eaf95e5eee4e2e69884df0953d094deae07216f72068ef1df08c0f49841d/uv-0.4.25-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:4c55040e67470f2b73e95e432aba06f103a0b348ea0b9c6689b1029c8d9e89fd", size = 15039860 }, - { url = "https://files.pythonhosted.org/packages/69/04/482b1cc9e8d599c7d766c4ba2d7a512ed3989921443792f92f26b8d44fe6/uv-0.4.25-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:bdbfd0c476b9e80a3f89af96aed6dd7d2782646311317a9c72614ccce99bb2ad", size = 13776302 }, - { url = "https://files.pythonhosted.org/packages/cd/7e/3d1cb735cc3df6341ac884b73eeec1f51a29192721be40be8e9b1d82666d/uv-0.4.25-py3-none-win32.whl", hash = "sha256:7d266e02fefef930609328c31c075084295c3cb472bab3f69549fad4fd9d82b3", size = 12970553 }, - { url = "https://files.pythonhosted.org/packages/04/e9/c00d2bb4a286b13fad0f06488ea9cbe9e76d0efcd81e7a907f72195d5b83/uv-0.4.25-py3-none-win_amd64.whl", hash = "sha256:be2a4fc4fcade9ea5e67e51738c95644360d6e59b6394b74fc579fb617f902f7", size = 14702875 }, + { url = "https://files.pythonhosted.org/packages/1c/8d/78b6927a3e511a4bc05347714c8917896477537bf09a6301e84de08b7a59/uv-0.4.29-py3-none-linux_armv6l.whl", hash = "sha256:287dc3fd3f78093a5a82136f01cbd9f224e0905b38d3dcffdc96c08fbbe48ee9", size = 13250618 }, + { url = "https://files.pythonhosted.org/packages/d8/2f/1bbfc3c15933fcf07c222e063044696320f5a9fe3d5c584960ed0c490cf8/uv-0.4.29-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6b03859068aaa08ca9907a51d403d54b0a9d8054091646845a9192f213f099d4", size = 13316211 }, + { url = "https://files.pythonhosted.org/packages/fb/1a/1c862cc36f29cf58b22758f31eb5f9611ee86429d470c8e4c0fd235592ec/uv-0.4.29-py3-none-macosx_11_0_arm64.whl", hash = "sha256:950bbfe1954e9c3a5d6c4777bb778b4c23d0dea9ad9f77622c45d4fbba433355", size = 12363705 }, + { url = "https://files.pythonhosted.org/packages/a1/0e/76e947db1135fa2436b11cc1ca927de187601be7ec65b0102f42a6a58211/uv-0.4.29-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:3473b05142ba436ac30d036b7ab5e9bcfa97f63df5d1382f92e0a3e4aaa391bc", size = 12622825 }, + { url = "https://files.pythonhosted.org/packages/41/3d/b54226b11eb935e4e57585905cf3ded2ac7d972c551bef1c3a000d4c5e47/uv-0.4.29-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7060dfbad0bc26e9cecbb4f8482445c958071511f23728948478f81acfb29048", size = 13054445 }, + { url = "https://files.pythonhosted.org/packages/bf/00/02fa712a3991957d2a65d043173d06d3a429acb3c4e54976f4385c034d97/uv-0.4.29-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df35d9cbe4cfbb7bce287f56e3bb7a7cef0b7b5173ed889d936d4c470f2b1b83", size = 13655646 }, + { url = "https://files.pythonhosted.org/packages/61/85/f6796032396bbd350648747c984376c8c8add14c75476ed8d5a3438a9c76/uv-0.4.29-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:cfb797a87b55d96cc0593e9f29ab5d58454be74598ea0158e1b2f4f2dc97cede", size = 14281147 }, + { url = "https://files.pythonhosted.org/packages/17/48/3314a03c6580d0b05bd1b9122ff9a9fbde5163815cf84f5a220fc013cea1/uv-0.4.29-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:668d3e6095c6f0cac6a831ef4030f7ad79442d1c84b9569f01f50b60c2d51a77", size = 14004714 }, + { url = "https://files.pythonhosted.org/packages/11/e0/456bc5271f09ff385c57570628705757a59f9a3f8205ff029dc9b2213dbd/uv-0.4.29-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0be21afa0e582ddc5badff6ef40c3c6784efc5feae4ad568307b668d40dc49bd", size = 18032241 }, + { url = "https://files.pythonhosted.org/packages/ef/6c/db10ff7f178ee93a832941e1cddbf38bfb1b0e30fd07580db10eb909f19d/uv-0.4.29-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6224a322267570e0470c61008fd1c8e2f50bf073b339f4c3010da86aef3c44c", size = 13787528 }, + { url = "https://files.pythonhosted.org/packages/1b/cf/501cd6aeeae0413e83ed0c112a362e44c05fa01144ecfd05c6fb3533778d/uv-0.4.29-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:24cccff9c248864ba0ab3429bae56314146c9494ce66a881d70ea8cf2805945f", size = 12789635 }, + { url = "https://files.pythonhosted.org/packages/8d/8d/3103af713c6369b6c1afe2bd8415eb43ea2cd4d11aa823f2e5747736b410/uv-0.4.29-py3-none-musllinux_1_1_armv7l.whl", hash = "sha256:68d4967b5f0af8bd46085e0f3ded229026700668a97734a21c3d11a5fc350c47", size = 13022589 }, + { url = "https://files.pythonhosted.org/packages/4f/4d/e9a0da7c43301f27503ed0af881afb9059e3700bd374d1c7c6579ff9fb29/uv-0.4.29-py3-none-musllinux_1_1_i686.whl", hash = "sha256:75927da78f74bb935314d236dc61ecdc192e878e06eb79585b6d9d5ee9829f98", size = 13367805 }, + { url = "https://files.pythonhosted.org/packages/be/70/a78cd7cdac7581cf0a7e027cf3c69d07ca5b6b83d39f571411cc73f1590f/uv-0.4.29-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:246da468ac0d51e7fb257cd038db2f8d6376ae269a44d01f56776e32108aa9da", size = 15158094 }, + { url = "https://files.pythonhosted.org/packages/e6/93/3bcb18a54a9823c8bfadd362022b1c480da10c0bcd86398101f9a124e0a7/uv-0.4.29-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:8c71663c7df4f512c697de39a4926dc191897f5fede73644bb2329f532c1ebfa", size = 13917229 }, + { url = "https://files.pythonhosted.org/packages/8a/38/bd90e265f868ddbc2dd3cc9625e2d8670d3ac35984a078491be11be754f3/uv-0.4.29-py3-none-win32.whl", hash = "sha256:b5775db128b98251c3ea7874367fc20dce9f9aac3dbfa635e3ef4a1c56842d9c", size = 13203439 }, + { url = "https://files.pythonhosted.org/packages/cb/4f/446a0fe5901b110093f3888e93c8ebee1b08f35ba1699bbaf3645b553865/uv-0.4.29-py3-none-win_amd64.whl", hash = "sha256:67dcfd253020e25ed1c49e5bd06406205c37264f99e14002de53a357cd1cdadf", size = 14902665 }, ] [[package]] @@ -2912,7 +3555,7 @@ wheels = [ [[package]] name = "yt-dlp" -version = "2024.10.7" +version = "2024.10.22" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "brotli", marker = "implementation_name == 'cpython'" }, @@ -2924,9 +3567,9 @@ dependencies = [ { name = "urllib3" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/2e/b1/08679efb4c1932dc6420deda8a89f03d7440d6462b7f61d339db2732a497/yt_dlp-2024.10.7.tar.gz", hash = "sha256:0baf1ab517c9748d7e337ced91c5543c36fc16246a9ebedac32ebf20c1998ceb", size = 2877443 } +sdist = { url = "https://files.pythonhosted.org/packages/2f/79/acfe1c2bf64ed83e1b465e6550c0f5bc2214ea447a900b102f5ca6e4186e/yt_dlp-2024.10.22.tar.gz", hash = "sha256:47b82a1fd22411b5c95ef2f0a1ae1af4e6dfd736ea99fdb2a0ea41445abc62ba", size = 2885622 } wheels = [ - { url = "https://files.pythonhosted.org/packages/6e/91/ecb07d66110334cdb01e94b187577af3b041897090203c9957728825d46f/yt_dlp-2024.10.7-py3-none-any.whl", hash = "sha256:9e336ae663bfd7ad3ea1c02e722747388172719efc0fc39a807dace3073aa704", size = 3149082 }, + { url = "https://files.pythonhosted.org/packages/bb/68/548f9819b41d53561d4f3d39588111cf39993c066b6e5300b4ae118eb2e6/yt_dlp-2024.10.22-py3-none-any.whl", hash = "sha256:ba166602ebe22a220e4dc1ead45bf00eb469ed812b22f4fb8bb54734f9b02084", size = 3155189 }, ] [[package]] @@ -2940,35 +3583,35 @@ wheels = [ [[package]] name = "zope-interface" -version = "7.1.0" +version = "7.1.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "setuptools" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e4/1f/8bb0739aba9a8909bcfa2e12dc20443ebd5bd773b6796603f1a126211e18/zope_interface-7.1.0.tar.gz", hash = "sha256:3f005869a1a05e368965adb2075f97f8ee9a26c61898a9e52a9764d93774f237", size = 300239 } +sdist = { url = "https://files.pythonhosted.org/packages/3c/f5/1079cab32302359cc09bd1dca9656e680601e0e8af9397322ab0fe85f368/zope.interface-7.1.1.tar.gz", hash = "sha256:4284d664ef0ff7b709836d4de7b13d80873dc5faeffc073abdb280058bfac5e3", size = 253129 } wheels = [ - { url = "https://files.pythonhosted.org/packages/52/cf/6fe78d1748ade8bde9e0afa0b7a6dc53427fa817c44c0c67937f4a3890ca/zope.interface-7.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2bd9e9f366a5df08ebbdc159f8224904c1c5ce63893984abb76954e6fbe4381a", size = 207992 }, - { url = "https://files.pythonhosted.org/packages/98/6a/7583a3bf0ba508d7454b69928ced99f516af674be7a2781d681bbdf3e439/zope.interface-7.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661d5df403cd3c5b8699ac480fa7f58047a3253b029db690efa0c3cf209993ef", size = 208498 }, - { url = "https://files.pythonhosted.org/packages/f2/d7/acae0a46ff4494ade2478335aeb2dec2ec024b7761915b82887cb04f207d/zope.interface-7.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91b6c30689cfd87c8f264acb2fc16ad6b3c72caba2aec1bf189314cf1a84ca33", size = 254730 }, - { url = "https://files.pythonhosted.org/packages/76/78/42201e0e6150a14d6aaf138f969186a89ec31d25a5860b7c054191cfefa6/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b6a4924f5bad9fe21d99f66a07da60d75696a136162427951ec3cb223a5570d", size = 249135 }, - { url = "https://files.pythonhosted.org/packages/3f/1e/a2bb69085db973bc936493e1a870c708b4e61496c4c1f04033a9aeb2dcce/zope.interface-7.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80a3c00b35f6170be5454b45abe2719ea65919a2f09e8a6e7b1362312a872cd3", size = 254254 }, - { url = "https://files.pythonhosted.org/packages/4f/cf/a5cb40b19f52c100d0ce22797f63ac865ced81fbf3a75a7ae0ecf2c45810/zope.interface-7.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b936d61dbe29572fd2cfe13e30b925e5383bed1aba867692670f5a2a2eb7b4e9", size = 211705 }, - { url = "https://files.pythonhosted.org/packages/9a/0b/c9dd45c073109fcaa63d5e167cae9e364fcb25f3626350127258a678ff0f/zope.interface-7.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0ac20581fc6cd7c754f6dff0ae06fedb060fa0e9ea6309d8be8b2701d9ea51c4", size = 208524 }, - { url = "https://files.pythonhosted.org/packages/e0/34/57afb328bcced4d0472c11cfab5581cc1e6bb91adf1bb87509a4f5690755/zope.interface-7.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:848b6fa92d7c8143646e64124ed46818a0049a24ecc517958c520081fd147685", size = 209032 }, - { url = "https://files.pythonhosted.org/packages/e9/a4/b2e4900f6d4a572979b5e8aa95f1ff9296b458978537f51ba546da51c108/zope.interface-7.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec1ef1fdb6f014d5886b97e52b16d0f852364f447d2ab0f0c6027765777b6667", size = 261251 }, - { url = "https://files.pythonhosted.org/packages/c3/89/2cd0a6b24819c024b340fa67f0dda65d0ac8bbd81f35a1fa7c468b681d55/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bcff5c09d0215f42ba64b49205a278e44413d9bf9fa688fd9e42bfe472b5f4f", size = 255366 }, - { url = "https://files.pythonhosted.org/packages/9e/00/e58be3067025ffbeed48094a07c1972d8150f6d628151fde66f16fa0d4ae/zope.interface-7.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07add15de0cc7e69917f7d286b64d54125c950aeb43efed7a5ea7172f000fbc1", size = 260078 }, - { url = "https://files.pythonhosted.org/packages/d1/b6/56436f9f6b74c13c9cd3dbd8345f47823d72b7c9ba2b39872cb7bee4cf42/zope.interface-7.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:9940d5bc441f887c5f375ec62bcf7e7e495a2d5b1da97de1184a88fb567f06af", size = 212092 }, - { url = "https://files.pythonhosted.org/packages/ee/d7/0ab8291230cf4fa05fa6f7bb26e0206d799a922070bc3a102f88133edc1e/zope.interface-7.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f245d039f72e6f802902375755846f5de1ee1e14c3e8736c078565599bcab621", size = 208649 }, - { url = "https://files.pythonhosted.org/packages/4e/ce/598d623faeca8a7ccb120a7d94f707efb61d21a57324a905c9a2bdb7b4b9/zope.interface-7.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6159e767d224d8f18deff634a1d3722e68d27488c357f62ebeb5f3e2f5288b1f", size = 209053 }, - { url = "https://files.pythonhosted.org/packages/ea/d0/c88caffdf6cf99e9b5d1fad9bdfa94d9eee21f72c2f9f4768bced100aab7/zope.interface-7.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e956b1fd7f3448dd5e00f273072e73e50dfafcb35e4227e6d5af208075593c9", size = 266506 }, - { url = "https://files.pythonhosted.org/packages/1d/bd/2b665bb66b18169828f0e3d0865eabdb3c8f59556db90367950edccfc072/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff115ef91c0eeac69cd92daeba36a9d8e14daee445b504eeea2b1c0b55821984", size = 261229 }, - { url = "https://files.pythonhosted.org/packages/04/a0/9a0595057002784395990b5e5a5e84e71905f5c110ea5ecae469dc831468/zope.interface-7.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bec001798ab62c3fc5447162bf48496ae9fba02edc295a9e10a0b0c639a6452e", size = 267167 }, - { url = "https://files.pythonhosted.org/packages/fb/64/cf1a22aad65dc9746fdc6705042c066011e3fe80f9c73aea9a53b0b3642d/zope.interface-7.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:124149e2d42067b9c6597f4dafdc7a0983d0163868f897b7bb5dc850b14f9a87", size = 212207 }, - { url = "https://files.pythonhosted.org/packages/43/39/75d4e59474ec7aeb8eebb01fae88e97ee8b0b3144d7a445679f000001977/zope.interface-7.1.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9733a9a0f94ef53d7aa64661811b20875b5bc6039034c6e42fb9732170130573", size = 208650 }, - { url = "https://files.pythonhosted.org/packages/c9/24/929b5530508a39a842fe50e159681b3dd36800604252940662268c3a8551/zope.interface-7.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5fcf379b875c610b5a41bc8a891841533f98de0520287d7f85e25386cd10d3e9", size = 209057 }, - { url = "https://files.pythonhosted.org/packages/fa/a3/07c120b40d47a3b28faadbacea579db8d7dc9214c909da13d72fd55395f7/zope.interface-7.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d0a45b5af9f72c805ee668d1479480ca85169312211bed6ed18c343e39307d5f", size = 266466 }, - { url = "https://files.pythonhosted.org/packages/4f/fa/e1925c8737787887a2801a45aadbc1ca8367fd9f135e721a2ce5a020e14d/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4af4a12b459a273b0b34679a5c3dc5e34c1847c3dd14a628aa0668e19e638ea2", size = 261220 }, - { url = "https://files.pythonhosted.org/packages/d5/79/d7828b915edf77f8f7849e0ab4380084d07c3d09ef86f9763f1490661d66/zope.interface-7.1.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a735f82d2e3ed47ca01a20dfc4c779b966b16352650a8036ab3955aad151ed8a", size = 267157 }, - { url = "https://files.pythonhosted.org/packages/98/ac/012f18dc9b35e8547975f6e0512bcb6a1e97901d7a5e4e4cb5899dee6304/zope.interface-7.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:5501e772aff595e3c54266bc1bfc5858e8f38974ce413a8f1044aae0f32a83a3", size = 212213 }, + { url = "https://files.pythonhosted.org/packages/33/41/328372febe88b50cb1c77d99fd3ee8e628fb125bd26b38b5351f8b9bdcbb/zope.interface-7.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6650bd56ef350d37c8baccfd3ee8a0483ed6f8666e641e4b9ae1a1827b79f9e5", size = 208001 }, + { url = "https://files.pythonhosted.org/packages/22/06/ced7336eeabba528a39803ccdf52200daa4e7b73d74feac52677f7c83a72/zope.interface-7.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84e87eba6b77a3af187bae82d8de1a7c208c2a04ec9f6bd444fd091b811ad92e", size = 208518 }, + { url = "https://files.pythonhosted.org/packages/9a/c9/3a63c758a68739080d8c343dda2fca4d214096ed97ce56b875086b309dd2/zope.interface-7.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c4e1b4c06d9abd1037c088dae1566c85f344a3e6ae4350744c3f7f7259d9c67", size = 254689 }, + { url = "https://files.pythonhosted.org/packages/9a/59/d8c59cfb16b3f086c868d0c531892c3914acbbb324005f0e5c640855a596/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7cd5e3d910ac87652a09f6e5db8e41bc3b49cf08ddd2d73d30afc644801492cd", size = 249133 }, + { url = "https://files.pythonhosted.org/packages/9a/6e/449acdd6530cbb9c224be3e59b032d8fc6db35ea8b398aaabcaee50f3881/zope.interface-7.1.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca95594d936ee349620900be5b46c0122a1ff6ce42d7d5cb2cf09dc84071ef16", size = 254250 }, + { url = "https://files.pythonhosted.org/packages/76/cb/8a13047ae686ca0a478cbf9043132acdcc8ccf71cfa0af287de235fd54f4/zope.interface-7.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:ad339509dcfbbc99bf8e147db6686249c4032f26586699ec4c82f6e5909c9fe2", size = 211708 }, + { url = "https://files.pythonhosted.org/packages/cc/9e/a53e0b252dca6f4858765efd4287239542e3018efe403ccf4f4947b1f6a8/zope.interface-7.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e59f175e868f856a77c0a77ba001385c377df2104fdbda6b9f99456a01e102a", size = 208535 }, + { url = "https://files.pythonhosted.org/packages/4a/2c/19bb3ead6133fe457e833af67cc8ce497f54bfd90f5ac532af6e4892acb2/zope.interface-7.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0de23bcb93401994ea00bc5c677ef06d420340ac0a4e9c10d80e047b9ce5af3f", size = 209053 }, + { url = "https://files.pythonhosted.org/packages/18/3f/3b341ed342f594f3b9e3fc48acecd929d118ee1ea6e415cedfebc2b78214/zope.interface-7.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cdb7e7e5524b76d3ec037c1d81a9e2c7457b240fd4cb0a2476b65c3a5a6c81f", size = 260764 }, + { url = "https://files.pythonhosted.org/packages/65/2a/bb8f72d938cf4edf7e40cbdf14477242a3753205c4f537dafdfbb33249e5/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3603ef82a9920bd0bfb505423cb7e937498ad971ad5a6141841e8f76d2fd5446", size = 254805 }, + { url = "https://files.pythonhosted.org/packages/b1/60/abc01b59a41762cf785be8e997a7301e3cb93d19e066a35f10fb31ac0277/zope.interface-7.1.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d52d052355e0c5c89e0630dd2ff7c0b823fd5f56286a663e92444761b35e25", size = 259573 }, + { url = "https://files.pythonhosted.org/packages/19/50/52a20a6a9e7c605eabb87dcdd5823369d3096854c41b968f2d1e18a8ae8f/zope.interface-7.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:179ad46ece518c9084cb272e4a69d266b659f7f8f48e51706746c2d8a426433e", size = 212067 }, + { url = "https://files.pythonhosted.org/packages/0f/fe/52bd130dd3f8b88868e741cf9bfeea4367e13d3f84933746f4ba01c85e6b/zope.interface-7.1.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e6503534b52bb1720ace9366ee30838a58a3413d3e197512f3338c8f34b5d89d", size = 208716 }, + { url = "https://files.pythonhosted.org/packages/8b/a9/51fe239b07f69384e77568ca3098c518926204eb1fdc7cdcc154c0c78521/zope.interface-7.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f85b290e5b8b11814efb0d004d8ce6c9a483c35c462e8d9bf84abb93e79fa770", size = 209115 }, + { url = "https://files.pythonhosted.org/packages/f0/fe/33f1f1e68d54c9563db436596a648e57c9dfc298dc0525d348cdb5e812d0/zope.interface-7.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d029fac6a80edae80f79c37e5e3abfa92968fe921886139b3ee470a1b177321a", size = 264001 }, + { url = "https://files.pythonhosted.org/packages/2e/7f/4d6dafc4debe955a72dd33f8cae1d2e522d43b42167ee8735fd0fe36961e/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5836b8fb044c6e75ba34dfaabc602493019eadfa0faf6ff25f4c4c356a71a853", size = 259018 }, + { url = "https://files.pythonhosted.org/packages/7d/3f/3180bbd9937a2889a67ad2515e56869e0cdb1f47a1f0da52dc1065c81ff8/zope.interface-7.1.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7395f13533318f150ee72adb55b29284b16e73b6d5f02ab21f173b3e83f242b8", size = 264470 }, + { url = "https://files.pythonhosted.org/packages/95/b8/46a52bfec80089d7e687c1e4471c5918e3a60c2dfff63d3e5588e4bd6656/zope.interface-7.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:1d0e23c6b746eb8ce04573cc47bcac60961ac138885d207bd6f57e27a1431ae8", size = 212226 }, + { url = "https://files.pythonhosted.org/packages/7e/78/60fb41f6fca56f90a107244e28768deac8697de8cc0f7c8469725c9949ad/zope.interface-7.1.1-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:9fad9bd5502221ab179f13ea251cb30eef7cf65023156967f86673aff54b53a0", size = 208720 }, + { url = "https://files.pythonhosted.org/packages/a5/4b/9152d924be141a1b52700ec0bb5c9a28795f67f4253dadb7f4c0c6d63675/zope.interface-7.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:55c373becbd36a44d0c9be1d5271422fdaa8562d158fb44b4192297b3c67096c", size = 209114 }, + { url = "https://files.pythonhosted.org/packages/00/cc/23d6d94db158b31b82e92202d3e8938d5e5cb38e3141af823a34bd8ae511/zope.interface-7.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed1df8cc01dd1e3970666a7370b8bfc7457371c58ba88c57bd5bca17ab198053", size = 263960 }, + { url = "https://files.pythonhosted.org/packages/e7/d6/acd466c950688ed8964ade5f9c5f2c035a52b44f18f19a6d79d3de48a255/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99c14f0727c978639139e6cad7a60e82b7720922678d75aacb90cf4ef74a068c", size = 259004 }, + { url = "https://files.pythonhosted.org/packages/71/31/44b746ed39134fa9c28262dc8ff9821c6b6f4df5a9edc1e599219d16cb79/zope.interface-7.1.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b1eed7670d564f1025d7cda89f99f216c30210e42e95de466135be0b4a499d9", size = 264463 }, + { url = "https://files.pythonhosted.org/packages/5a/e1/30fb5f7e587e14a57c8f41413cb76eecbcfd878ef105eb908d2d2e648b73/zope.interface-7.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:3defc925c4b22ac1272d544a49c6ba04c3eefcce3200319ee1be03d9270306dd", size = 212236 }, ]