This commit is contained in:
Nick Sweeting 2024-10-25 01:06:12 -07:00
parent 4b6f08b0fe
commit 5d9a32c364
No known key found for this signature in database
178 changed files with 2982 additions and 1322 deletions

View file

@ -1,131 +0,0 @@
__package__ = 'abx'
import importlib
from pathlib import Path
from typing import Dict, Callable, List
from . import hookspec as base_spec
from abx.hookspec import hookimpl, hookspec # noqa
from abx.manager import pm, PluginManager # noqa
pm.add_hookspecs(base_spec)
###### PLUGIN DISCOVERY AND LOADING ########################################################
def get_plugin_order(plugin_entrypoint: Path):
order = 999
try:
# if .plugin_order file exists, use it to set the load priority
order = int((plugin_entrypoint.parent / '.plugin_order').read_text())
except FileNotFoundError:
pass
return (order, plugin_entrypoint)
def register_hookspecs(hookspecs: List[str]):
"""
Register all the hookspecs from a list of module names.
"""
for hookspec_import_path in hookspecs:
hookspec_module = importlib.import_module(hookspec_import_path)
pm.add_hookspecs(hookspec_module)
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
"""
Find all the plugins in a given directory. Just looks for an __init__.py file.
"""
return {
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
if plugin_entrypoint.parent.name != 'abx'
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
def get_pip_installed_plugins(group='abx'):
"""replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
import importlib.metadata
DETECTED_PLUGINS = {} # module_name: module_dir_path
for dist in list(importlib.metadata.distributions()):
for entrypoint in dist.entry_points:
if entrypoint.group != group or pm.is_blocked(entrypoint.name):
continue
DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
# pm.register(plugin, name=ep.name)
# pm._plugin_distinfo.append((plugin, DistFacade(dist)))
return DETECTED_PLUGINS
def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
"""
Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
"""
DETECTED_PLUGINS = {}
for plugin_prefix, plugin_dir in plugin_dirs.items():
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
return DETECTED_PLUGINS
# Load all plugins from pip packages, archivebox built-ins, and user plugins
def load_plugins(plugins_dict: Dict[str, Path]):
"""
Load all the plugins from a dictionary of module names and directory paths.
"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in plugins_dict.items():
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
# print(f' √ Loaded plugin: {plugin_module}')
return LOADED_PLUGINS
def get_registered_plugins():
"""
Get all the plugins registered with Pluggy.
"""
plugins = {}
plugin_to_distinfo = dict(pm.list_plugin_distinfo())
for plugin in pm.get_plugins():
plugin_info = {
"name": plugin.__name__,
"hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
}
distinfo = plugin_to_distinfo.get(plugin)
if distinfo:
plugin_info["version"] = distinfo.version
plugin_info["name"] = (
getattr(distinfo, "name", None) or distinfo.project_name
)
plugins[plugin_info["name"]] = plugin_info
return plugins
def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
"""
Get all the functions marked with @hookimpl on a module.
"""
if not plugin_pkg:
return {}
hooks = {}
plugin_module = importlib.import_module(plugin_pkg)
for attr_name in dir(plugin_module):
if attr_name.startswith('_'):
continue
try:
attr = getattr(plugin_module, attr_name)
if isinstance(attr, Callable):
hooks[attr_name] = None
pm.parse_hookimpl_opts(plugin_module, attr_name)
hooks[attr_name] = attr
except Exception as e:
print(f'Error getting hookimpls for {plugin_pkg}: {e}')
return hooks

View file

@ -1,30 +0,0 @@
__package__ = 'abx.archivebox'
import os
import importlib
from typing import Dict
from pathlib import Path
def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
"""Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
LOADED_PLUGINS = {}
for plugin_module, plugin_dir in reversed(plugins_dict.items()):
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
# 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
try:
plugin_module_loaded = importlib.import_module(plugin_module)
pm.register(plugin_module_loaded)
except Exception as e:
print(f'Error registering plugin: {plugin_module} - {e}')
# 2. then try to import plugin_module.apps as well
if os.access(plugin_dir / 'apps.py', os.R_OK):
plugin_apps = importlib.import_module(plugin_module + '.apps')
pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
# print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
return LOADED_PLUGINS

View file

@ -1,117 +0,0 @@
__package__ = "abx.archivebox"
import os
from typing import Optional, cast
from typing_extensions import Self
from pydantic import validate_call
from pydantic_pkgr import (
Binary,
BinProvider,
BinProviderName,
AptProvider,
BrewProvider,
EnvProvider,
)
from archivebox.config.permissions import ARCHIVEBOX_USER
import abx
class BaseBinProvider(BinProvider):
# TODO: add install/load/load_or_install methods as abx.hookimpl methods
@property
def admin_url(self) -> str:
# e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
return "/admin/environment/binaries/"
@abx.hookimpl
def get_BINPROVIDERS(self):
return [self]
class BaseBinary(Binary):
# TODO: formalize state diagram, final states, transitions, side effects, etc.
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
from archivebox.config.common import STORAGE_CONFIG
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
return
try:
bin_dir.mkdir(parents=True, exist_ok=True)
symlink = bin_dir / binary.name
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
symlink.chmod(0o777) # make sure its executable by everyone
except Exception as err:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
@validate_call
def load(self, fresh=False, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
if fresh:
binary = super().load(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
else:
# get cached binary from db
try:
from machine.models import InstalledBinary
installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore
binary = InstalledBinary.load_from_db(installed_binary)
except Exception:
# maybe we are not in a DATA dir so there is no db, fallback to reading from fs
# (e.g. when archivebox version is run outside of a DATA dir)
binary = super().load(**kwargs)
return cast(Self, binary)
@validate_call
def install(self, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
binary = super().install(**kwargs)
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
return binary
@validate_call
def load_or_install(self, fresh=False, **kwargs) -> Self:
from archivebox.config.common import STORAGE_CONFIG
try:
binary = self.load(fresh=fresh)
if binary and binary.version:
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
return binary
except Exception:
pass
return self.install(**kwargs)
@property
def admin_url(self) -> str:
# e.g. /admin/environment/config/LdapConfig/
return f"/admin/environment/binaries/{self.name}/"
@abx.hookimpl
def get_BINARIES(self):
return [self]
class AptBinProvider(AptProvider, BaseBinProvider):
name: BinProviderName = "apt"
class BrewBinProvider(BrewProvider, BaseBinProvider):
name: BinProviderName = "brew"
class EnvBinProvider(EnvProvider, BaseBinProvider):
name: BinProviderName = "env"
euid: Optional[int] = ARCHIVEBOX_USER
apt = AptBinProvider()
brew = BrewBinProvider()
env = EnvBinProvider()

View file

@ -1,204 +0,0 @@
__package__ = 'abx.archivebox'
import json
import os
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
from pathlib import Path
from pydantic import AfterValidator
from pydantic_pkgr import BinName
from django.utils.functional import cached_property
from django.utils import timezone
import abx
from .base_binary import BaseBinary
def assert_no_empty_args(args: List[str]) -> List[str]:
assert all(len(arg) for arg in args)
return args
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
class BaseExtractor:
name: ExtractorName
binary: BinName
default_args: CmdArgsList = []
extra_args: CmdArgsList = []
def get_output_path(self, snapshot) -> Path:
return Path(self.__class__.__name__.lower())
def should_extract(self, uri: str, config: dict | None=None) -> bool:
try:
assert self.detect_installed_binary().version
except Exception:
raise
# could not load binary
return False
# output_dir = self.get_output_path(snapshot)
# if output_dir.glob('*.*'):
# return False
return True
@abx.hookimpl
def extract(self, snapshot_id: str) -> Dict[str, Any]:
from core.models import Snapshot
from archivebox import CONSTANTS
snapshot = Snapshot.objects.get(id=snapshot_id)
if not self.should_extract(snapshot.url):
return {}
status = 'failed'
start_ts = timezone.now()
uplink = self.detect_network_interface()
installed_binary = self.detect_installed_binary()
machine = installed_binary.machine
assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
output_dir.mkdir(parents=True, exist_ok=True)
# execute the extractor binary with the given args
args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
cmd = [str(installed_binary.abspath), *args]
proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
# collect the output
end_ts = timezone.now()
output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
stdout = proc.stdout.strip()
stderr = proc.stderr.strip()
output_json = None
output_text = stdout
try:
output_json = json.loads(stdout.strip())
output_text = None
except json.JSONDecodeError:
pass
errors = []
if proc.returncode == 0:
status = 'success'
else:
errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
# increment health stats counters
if status == 'success':
machine.record_health_success()
uplink.record_health_success()
installed_binary.record_health_success()
else:
machine.record_health_failure()
uplink.record_health_failure()
installed_binary.record_health_failure()
return {
'extractor': self.name,
'snapshot': {
'id': snapshot.id,
'abid': snapshot.abid,
'url': snapshot.url,
'created_by_id': snapshot.created_by_id,
},
'machine': {
'id': machine.id,
'abid': machine.abid,
'guid': machine.guid,
'hostname': machine.hostname,
'hw_in_docker': machine.hw_in_docker,
'hw_in_vm': machine.hw_in_vm,
'hw_manufacturer': machine.hw_manufacturer,
'hw_product': machine.hw_product,
'hw_uuid': machine.hw_uuid,
'os_arch': machine.os_arch,
'os_family': machine.os_family,
'os_platform': machine.os_platform,
'os_release': machine.os_release,
'os_kernel': machine.os_kernel,
},
'uplink': {
'id': uplink.id,
'abid': uplink.abid,
'mac_address': uplink.mac_address,
'ip_public': uplink.ip_public,
'ip_local': uplink.ip_local,
'dns_server': uplink.dns_server,
'hostname': uplink.hostname,
'iface': uplink.iface,
'isp': uplink.isp,
'city': uplink.city,
'region': uplink.region,
'country': uplink.country,
},
'binary': {
'id': installed_binary.id,
'abid': installed_binary.abid,
'name': installed_binary.name,
'binprovider': installed_binary.binprovider,
'abspath': installed_binary.abspath,
'version': installed_binary.version,
'sha256': installed_binary.sha256,
},
'cmd': cmd,
'stdout': stdout,
'stderr': stderr,
'returncode': proc.returncode,
'start_ts': start_ts,
'end_ts': end_ts,
'status': status,
'errors': errors,
'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
'output_files': output_files,
'output_json': output_json or {},
'output_text': output_text or '',
}
# TODO: move this to a hookimpl
def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
cwd = cwd or Path(os.getcwd())
binary = self.load_binary(installed_binary=installed_binary)
return binary.exec(cmd=args, cwd=cwd)
@cached_property
def BINARY(self) -> BaseBinary:
import abx.archivebox.reads
for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.binary:
return binary
raise ValueError(f'Binary {self.binary} not found')
def detect_installed_binary(self):
from machine.models import InstalledBinary
# hydrates binary from DB/cache if record of installed version is recent enough
# otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
def load_binary(self, installed_binary=None) -> BaseBinary:
installed_binary = installed_binary or self.detect_installed_binary()
return installed_binary.load_from_db()
def detect_network_interface(self):
from machine.models import NetworkInterface
return NetworkInterface.objects.current()
@abx.hookimpl
def get_EXTRACTORS(self):
return [self]

View file

@ -1,25 +0,0 @@
__package__ = 'abx.archivebox'
import abx
class BaseReplayer:
"""Describes how to render an ArchiveResult in several contexts"""
url_pattern: str = '*'
row_template: str = 'plugins/generic_replayer/templates/row.html'
embed_template: str = 'plugins/generic_replayer/templates/embed.html'
fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
# row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
# embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
# fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
@abx.hookimpl
def get_REPLAYERS(self):
return [self]
# TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...

View file

@ -1,52 +0,0 @@
__package__ = 'abx.archivebox'
from typing import Dict, Any
from .. import hookspec
from .base_binary import BaseBinary, BaseBinProvider
from .base_configset import BaseConfigSet
from .base_extractor import BaseExtractor
from .base_searchbackend import BaseSearchBackend
@hookspec
def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
return {}
@hookspec
def get_CONFIG() -> Dict[str, BaseConfigSet]:
return {}
@hookspec
def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
return {}
@hookspec
def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
return {}
# @hookspec
# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
# return {}
# @hookspec
# def get_ADMINDATAVIEWS():
# return {}
# @hookspec
# def get_QUEUES():
# return {}
##############################################################
# provided by abx.pydantic_pkgr.hookspec:
# @hookspec
# def get_BINARIES() -> Dict[str, BaseBinary]:
# return {}
# @hookspec
# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
# return {}

View file

@ -1,160 +0,0 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict
import abx
from .. import pm
if TYPE_CHECKING:
from .base_configset import BaseConfigSet
from .base_binary import BaseBinary, BaseBinProvider
from .base_extractor import BaseExtractor
from .base_searchbackend import BaseSearchBackend
# from .base_replayer import BaseReplayer
# from .base_queue import BaseQueue
# from .base_admindataview import BaseAdminDataView
# API exposed to ArchiveBox code
def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
return benedict({
plugin_id: plugin
for plugin_dict in pm.hook.get_PLUGIN()
for plugin_id, plugin in plugin_dict.items()
})
def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
plugin_info = get_PLUGINS().get(plugin_id, {})
package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
if not package:
return {'id': plugin_id, 'hooks': {}}
module = importlib.import_module(package)
hooks = abx.get_plugin_hooks(module.__package__)
assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
return benedict({
'id': plugin_id,
'label': getattr(module, '__label__', plugin_id),
'module': module,
'package': module.__package__,
'hooks': hooks,
'version': getattr(module, '__version__', '999.999.999'),
'author': getattr(module, '__author__', 'Unknown'),
'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
'dependencies': getattr(module, '__dependencies__', []),
'source_code': module.__file__,
**plugin_info,
})
def get_HOOKS() -> Set[str]:
return {
hook_name
for plugin_id in get_PLUGINS().keys()
for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
return benedict({
config_id: configset
for plugin_configs in pm.hook.get_CONFIG()
for config_id, configset in plugin_configs.items()
})
def get_FLAT_CONFIG() -> Dict[str, Any]:
return benedict({
key: value
for configset in get_CONFIGS().values()
for key, value in configset.model_dump().items()
})
def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
# TODO: move these to plugins
from abx.archivebox.base_binary import apt, brew, env
builtin_binproviders = {
'env': env,
'apt': apt,
'brew': brew,
}
return benedict({
binprovider_id: binprovider
for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
for binprovider_id, binprovider in plugin_binproviders.items()
})
def get_BINARIES() -> Dict[str, 'BaseBinary']:
return benedict({
binary_id: binary
for plugin_binaries in pm.hook.get_BINARIES()
for binary_id, binary in plugin_binaries.items()
})
def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
return benedict({
extractor_id: extractor
for plugin_extractors in pm.hook.get_EXTRACTORS()
for extractor_id, extractor in plugin_extractors.items()
})
# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
# return benedict({
# replayer.id: replayer
# for plugin_replayers in pm.hook.get_REPLAYERS()
# for replayer in plugin_replayers
# })
# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
# return benedict({
# admin_dataview.id: admin_dataview
# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
# for admin_dataview in plugin_admin_dataviews
# })
# def get_QUEUES() -> Dict[str, 'BaseQueue']:
# return benedict({
# queue.id: queue
# for plugin_queues in pm.hook.get_QUEUES()
# for queue in plugin_queues
# })
def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
return benedict({
searchbackend_id: searchbackend
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend_id,searchbackend in plugin_searchbackends.items()
})
def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
"""Get all the relevant config for the given scope, in correct precedence order"""
from django.conf import settings
default_config: benedict = defaults or settings.CONFIG
snapshot = snapshot or (archiveresult and archiveresult.snapshot)
crawl = crawl or (snapshot and snapshot.crawl)
seed = seed or (crawl and crawl.seed)
persona = persona or (crawl and crawl.persona)
persona_config = persona.config if persona else {}
seed_config = seed.config if seed else {}
crawl_config = crawl.config if crawl else {}
snapshot_config = snapshot.config if snapshot else {}
archiveresult_config = archiveresult.config if archiveresult else {}
extra_config = extra_config or {}
return {
**default_config, # defaults / config file / environment variables
**persona_config, # lowest precedence
**seed_config,
**crawl_config,
**snapshot_config,
**archiveresult_config,
**extra_config, # highest precedence
}

View file

@ -1 +0,0 @@
__package__ = 'abx.django'

View file

@ -1,101 +0,0 @@
__package__ = 'abx.django'
import itertools
# from benedict import benedict
from .. import pm
def get_INSTALLED_APPS():
return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
# def register_INSTALLLED_APPS(INSTALLED_APPS):
# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
def get_MIDDLEWARES():
return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
# def register_MIDDLEWARES(MIDDLEWARE):
# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
def get_AUTHENTICATION_BACKENDS():
return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
def get_STATICFILES_DIRS():
return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
def get_TEMPLATE_DIRS():
return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
HUEY_QUEUES = {}
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
HUEY_QUEUES.update(plugin_result)
return HUEY_QUEUES
# def register_DJANGO_HUEY(DJANGO_HUEY):
# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
def get_ADMIN_DATA_VIEWS_URLS():
return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
# def register_settings(settings):
# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
# settings_as_obj = benedict(settings, keypath_separator=None)
# # set default values for settings that are used by plugins
# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
# # # call all the hook functions to mutate the settings values in-place
# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
# # calls Plugin.settings(settings) on each registered plugin
# pm.hook.register_settings(settings=settings_as_obj)
# # then finally update the settings globals() object will all the new settings
# # settings.update(settings_as_obj)
def get_urlpatterns():
return list(itertools.chain(*pm.hook.urlpatterns()))
def register_urlpatterns(urlpatterns):
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
def register_checks():
"""register any django system checks"""
pm.hook.register_checks()
def register_admin(admin_site):
"""register any django admin models/views with the main django admin site instance"""
pm.hook.register_admin(admin_site=admin_site)

View file

@ -1,22 +0,0 @@
from pathlib import Path
from pluggy import HookimplMarker
from pluggy import HookspecMarker
spec = hookspec = HookspecMarker("abx")
impl = hookimpl = HookimplMarker("abx")
@hookspec
@hookimpl
def get_system_user() -> str:
# Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases
# - sudo (EUD != UID != SUID)
# - running with an autodetected UID based on data dir ownership
# but mapping of UID:username is broken because it was created
# by a different host system, e.g. 911's $HOME outside of docker
# might be /usr/lib/lxd instead of /home/archivebox
# - running as a user that doens't have a home directory
# - home directory is set to a path that doesn't exist, or is inside a dir we cant read
return Path('~').expanduser().name

View file

@ -1,30 +0,0 @@
import inspect
import pluggy
class PluginManager(pluggy.PluginManager):
"""
Patch to fix pluggy's PluginManager to work with pydantic models.
See: https://github.com/pytest-dev/pluggy/pull/536
"""
def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
# IMPORTANT: @property methods can have side effects, and are never hookimpl
# if attr is a property, skip it in advance
plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
if isinstance(getattr(plugin_class, name, None), property):
return None
# pydantic model fields are like attrs and also can never be hookimpls
plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
# pydantic models mess with the class and attr __signature__
# so inspect.isroutine(...) throws exceptions and cant be used
return None
try:
return super().parse_hookimpl_opts(plugin, name)
except AttributeError:
return super().parse_hookimpl_opts(type(plugin), name)
pm = PluginManager("abx")

View file

@ -1 +0,0 @@
__package__ = 'abx.pydantic_pkgr'

View file

@ -1,13 +0,0 @@
from ..hookspec import hookspec
###########################################################################################
@hookspec
def get_BINPROVIDERS():
return {}
@hookspec
def get_BINARIES():
return {}

View file

@ -9,9 +9,6 @@ from pathlib import Path
from django.utils.crypto import get_random_string
import abx
import abx.archivebox
import abx.archivebox.reads
import abx.django.use
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
@ -26,43 +23,22 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
################################################################################
PLUGIN_HOOKSPECS = [
'abx.django.hookspec',
'abx.pydantic_pkgr.hookspec',
'abx.archivebox.hookspec',
'abx_spec_django',
'abx_spec_pydantic_pkgr',
'abx_spec_config',
'abx_spec_archivebox',
]
abx.register_hookspecs(PLUGIN_HOOKSPECS)
BUILTIN_PLUGIN_DIRS = {
'archivebox': PACKAGE_DIR,
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
'plugins_search': PACKAGE_DIR / 'plugins_search',
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
}
USER_PLUGIN_DIRS = {
# 'user_plugins': DATA_DIR / 'user_plugins',
}
SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins')
# Discover ArchiveBox plugins
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS}
# Load ArchiveBox plugins
PLUGIN_MANAGER = abx.pm
abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
PLUGINS = abx.archivebox.reads.get_PLUGINS()
abx.load_plugins(ALL_PLUGINS)
# Load ArchiveBox config from plugins
CONFIGS = abx.archivebox.reads.get_CONFIGS()
CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
BINARIES = abx.archivebox.reads.get_BINARIES()
EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
# # Load ArchiveBox config from plugins
################################################################################
@ -110,7 +86,7 @@ INSTALLED_APPS = [
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
# ArchiveBox plugins
*abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
*abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
# 3rd-party apps from PyPI that need to be loaded last
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
@ -135,7 +111,7 @@ MIDDLEWARE = [
'core.middleware.ReverseProxyAuthMiddleware',
'django.contrib.messages.middleware.MessageMiddleware',
'core.middleware.CacheControlMiddleware',
*abx.django.use.get_MIDDLEWARES(),
*abx.as_list(abx.pm.hook.get_MIDDLEWARES()),
]
@ -148,7 +124,7 @@ MIDDLEWARE = [
AUTHENTICATION_BACKENDS = [
'django.contrib.auth.backends.RemoteUserBackend',
'django.contrib.auth.backends.ModelBackend',
*abx.django.use.get_AUTHENTICATION_BACKENDS(),
*abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()),
]
@ -169,7 +145,7 @@ AUTHENTICATION_BACKENDS = [
STATIC_URL = '/static/'
TEMPLATES_DIR_NAME = 'templates'
CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir()
CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK)
STATICFILES_DIRS = [
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []),
# *[
@ -177,7 +153,7 @@ STATICFILES_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'static').is_dir()
# ],
*abx.django.use.get_STATICFILES_DIRS(),
*abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
]
@ -188,7 +164,7 @@ TEMPLATE_DIRS = [
# for plugin_dir in PLUGIN_DIRS.values()
# if (plugin_dir / 'templates').is_dir()
# ],
*abx.django.use.get_TEMPLATE_DIRS(),
*abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
@ -292,7 +268,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file
"queues": {
HUEY["name"]: HUEY.copy(),
# more registered here at plugin import-time by BaseQueue.register()
**abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME),
**abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)),
},
}
@ -517,7 +493,7 @@ ADMIN_DATA_VIEWS = {
"name": "log",
},
},
*abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
*abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()),
],
}
@ -611,7 +587,4 @@ if DEBUG_REQUESTS_TRACKER:
# JET_TOKEN = 'some-api-token-here'
abx.django.use.register_checks()
# abx.archivebox.reads.register_all_hooks(globals())
# import ipdb; ipdb.set_trace()

View file

@ -1,42 +0,0 @@
__package__ = 'plugins_pkg.npm'
from pathlib import Path
from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
from archivebox.config import DATA_DIR, CONSTANTS
from abx.archivebox.base_binary import BaseBinProvider
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "sys_npm"
npm_prefix: Optional[Path] = None
class LibNpmBinProvider(NpmProvider, BaseBinProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
def setup(self) -> None:
# update paths from config if they arent the default
from archivebox.config.common import STORAGE_CONFIG
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
super().setup()
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
npm = LIB_NPM_BINPROVIDER

View file

@ -8,8 +8,8 @@ VENDORED_LIBS = {
# sys.path dir: library name
#'python-atomicwrites': 'atomicwrites',
#'django-taggit': 'taggit',
'pydantic-pkgr': 'pydantic_pkgr',
'pocket': 'pocket',
# 'pydantic-pkgr': 'pydantic_pkgr',
# 'pocket': 'pocket',
#'base32-crockford': 'base32_crockford',
}

@ -1 +0,0 @@
Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5

@ -1 +0,0 @@
Subproject commit a774f24644ee14f14fa2cc3d8e6e0a585ae00fdd

32
click_test.py Normal file
View file

@ -0,0 +1,32 @@
import sys
import click
from rich import print
from archivebox.config.django import setup_django
setup_django()
import abx.archivebox.writes
def parse_stdin_to_args(io=sys.stdin):
for line in io.read().split('\n'):
for url_or_id in line.split(' '):
if url_or_id.strip():
yield url_or_id.strip()
# Gather data from stdin in case using a pipe
if not sys.stdin.isatty():
sys.argv += parse_stdin_to_args(sys.stdin)
@click.command()
@click.argument("snapshot_ids_or_urls", type=str, nargs=-1)
def extract(snapshot_ids_or_urls):
for url_or_snapshot_id in snapshot_ids_or_urls:
print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr)
for result in abx.archivebox.writes.extract(url_or_snapshot_id):
print(result)
if __name__ == "__main__":
extract()

View file

@ -0,0 +1,7 @@
[project]
name = "abx-archivedotorg-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -13,15 +13,15 @@ from pydantic_pkgr import (
bin_abspath,
)
import abx.archivebox.reads
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
# Depends on Other Plugins:
from archivebox.config.common import SHELL_CONFIG
from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER
from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER
from .config import CHROME_CONFIG
CHROMIUM_BINARY_NAMES_LINUX = [
"chromium",
"chromium-browser",
@ -48,12 +48,13 @@ CHROME_BINARY_NAMES_MACOS = [
]
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
APT_DEPENDENCIES = [
'apt-transport-https', 'at-spi2-common', 'chromium-browser',
CHROME_APT_DEPENDENCIES = [
'apt-transport-https', 'at-spi2-common',
'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
'chromium-browser',
]
@ -95,7 +96,7 @@ class ChromeBinary(BaseBinary):
'packages': ['chromium'], # playwright install chromium
},
apt.name: {
'packages': APT_DEPENDENCIES,
'packages': CHROME_APT_DEPENDENCIES,
},
brew.name: {
'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [],
@ -104,10 +105,9 @@ class ChromeBinary(BaseBinary):
@staticmethod
def symlink_to_lib(binary, bin_dir=None) -> None:
from archivebox.config.common import STORAGE_CONFIG
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin'
if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
if not (binary.abspath and os.path.isfile(binary.abspath)):
return
bin_dir.mkdir(parents=True, exist_ok=True)
@ -121,7 +121,7 @@ class ChromeBinary(BaseBinary):
# otherwise on linux we can symlink directly to binary executable
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
except Exception as err:
except Exception:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
@ -132,14 +132,17 @@ class ChromeBinary(BaseBinary):
Cleans up any state or runtime files that chrome leaves behind when killed by
a timeout or other error
"""
lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
lock_file.unlink()
try:
linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
linux_lock_file.unlink(missing_ok=True)
except Exception:
pass
if CHROME_CONFIG.CHROME_USER_DATA_DIR:
if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
lock_file.unlink()
try:
(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True)
except Exception:
pass

View file

@ -0,0 +1,7 @@
[project]
name = "abx-chrome-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-curl-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,24 @@
import abx
from typing import Dict
from pydantic_pkgr import (
AptProvider,
BrewProvider,
EnvProvider,
BinProvider,
)
apt = APT_BINPROVIDER = AptProvider()
brew = BREW_BINPROVIDER = BrewProvider()
env = ENV_BINPROVIDER = EnvProvider()
@abx.hookimpl(tryfirst=True)
def get_BINPROVIDERS() -> Dict[str, BinProvider]:
return {
'apt': APT_BINPROVIDER,
'brew': BREW_BINPROVIDER,
'env': ENV_BINPROVIDER,
}

View file

@ -0,0 +1,18 @@
[project]
name = "abx-plugin-default-binproviders"
version = "2024.10.24"
description = "Default BinProviders for ABX (apt, brew, env)"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_default_binproviders = "abx_plugin_default_binproviders"

View file

@ -0,0 +1,7 @@
[project]
name = "abx-favicon-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-git-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-htmltotext-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

View file

@ -0,0 +1,22 @@
[project]
name = "abx-ldap-auth"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []
[project.entry-points.abx]
ldap = "abx_ldap_auth"
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.sdist]
packages = ["."]
[tool.hatch.build.targets.wheel]
packages = ["."]

View file

@ -0,0 +1,7 @@
[project]
name = "abx-mercury-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -1,26 +1,12 @@
__package__ = 'plugins_pkg.npm'
__version__ = '2024.10.14'
__package__ = 'abx_plugin_npm_binprovider'
__id__ = 'npm'
__label__ = 'npm'
__label__ = 'NPM'
__author__ = 'ArchiveBox'
__homepage__ = 'https://www.npmjs.com/'
import abx
@abx.hookimpl
def get_PLUGIN():
return {
__id__: {
'id': __id__,
'package': __package__,
'label': __label__,
'version': __version__,
'author': __author__,
'homepage': __homepage__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import NPM_CONFIG

View file

@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm'
from typing import List
from pydantic import InstanceOf
from benedict import benedict
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
from abx_plugin_default_binproviders import get_BINPROVIDERS
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
env = DEFAULT_BINPROVIDERS.env
apt = DEFAULT_BINPROVIDERS.apt
brew = DEFAULT_BINPROVIDERS.brew
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
class NodeBinary(BaseBinary):
class NodeBinary(Binary):
name: BinName = 'node'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@ -23,7 +28,7 @@ class NodeBinary(BaseBinary):
NODE_BINARY = NodeBinary()
class NpmBinary(BaseBinary):
class NpmBinary(Binary):
name: BinName = 'npm'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
@ -35,7 +40,7 @@ class NpmBinary(BaseBinary):
NPM_BINARY = NpmBinary()
class NpxBinary(BaseBinary):
class NpxBinary(Binary):
name: BinName = 'npx'
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]

View file

@ -0,0 +1,39 @@
import os
from pathlib import Path
from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
import abx
DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm')
OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin'
NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin'
class SystemNpmBinProvider(NpmProvider):
name: BinProviderName = "sys_npm"
npm_prefix: Optional[Path] = None
class LibNpmBinProvider(NpmProvider):
name: BinProviderName = "lib_npm"
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR
def setup(self) -> None:
# update paths from config at runtime
LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR
self.npm_prefix = LIB_DIR / 'npm'
self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
super().setup()
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
npm = LIB_NPM_BINPROVIDER

View file

@ -1,7 +1,4 @@
__package__ = 'plugins_pkg.npm'
from abx.archivebox.base_configset import BaseConfigSet
from abx_spec_config import BaseConfigSet
###################### Config ##########################

View file

@ -0,0 +1,20 @@
[project]
name = "abx-plugin-npm-binprovider"
version = "2024.10.24"
description = "NPM binary provider plugin for ABX"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider"

View file

@ -1,33 +1,19 @@
__package__ = 'plugins_pkg.pip'
__label__ = 'pip'
__version__ = '2024.10.14'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/pypa/pip'
__package__ = 'abx_plugin_pip_binprovider'
__id__ = 'pip'
__label__ = 'PIP'
import abx
@abx.hookimpl
def get_PLUGIN():
return {
'pip': {
'PACKAGE': __package__,
'LABEL': __label__,
'VERSION': __version__,
'AUTHOR': __author__,
'HOMEPAGE': __homepage__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import PIP_CONFIG
return {
'pip': PIP_CONFIG
__id__: PIP_CONFIG
}
@abx.hookimpl
@abx.hookimpl(tryfirst=True)
def get_BINARIES():
from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_pkg.pip'
__package__ = 'abx_plugin_pip_binprovider'
import sys
from pathlib import Path
@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator
import django
import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
from archivebox import VERSION
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
from archivebox.misc.logging import hint
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew
###################### Config ##########################
def get_archivebox_version():
try:
from archivebox import VERSION
return VERSION
except Exception:
return None
class ArchiveboxBinary(BaseBinary):
class ArchiveboxBinary(Binary):
name: BinName = 'archivebox'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
apt.name: {'packages': [], 'version': VERSION},
brew.name: {'packages': [], 'version': VERSION},
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
apt.name: {'packages': [], 'version': get_archivebox_version},
brew.name: {'packages': [], 'version': get_archivebox_version},
}
# @validate_call
@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary):
ARCHIVEBOX_BINARY = ArchiveboxBinary()
class PythonBinary(BaseBinary):
class PythonBinary(Binary):
name: BinName = 'python'
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__)
LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version)
LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class SqliteBinary(BaseBinary):
class SqliteBinary(Binary):
name: BinName = 'sqlite'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary):
cursor.execute('SELECT JSON(\'{"a": "b"}\')')
except django_sqlite3.OperationalError as exc:
print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]')
hint([
'Upgrade your Python version or install the extension manually:',
'https://code.djangoproject.com/wiki/JSON1Extension'
])
print(
'[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' +
' https://code.djangoproject.com/wiki/JSON1Extension\n'
)
return self
# @validate_call
@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__)
LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3])
LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
class DjangoBinary(BaseBinary):
class DjangoBinary(Binary):
name: BinName = 'django'
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
overrides: BinaryOverrides = {
VENV_PIP_BINPROVIDER.name: {
"abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary):
DJANGO_BINARY = DjangoBinary()
class PipBinary(BaseBinary):
class PipBinary(Binary):
name: BinName = "pip"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
@ -154,7 +155,7 @@ class PipBinary(BaseBinary):
PIP_BINARY = PipBinary()
class PipxBinary(BaseBinary):
class PipxBinary(Binary):
name: BinName = "pipx"
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]

View file

@ -1,21 +1,26 @@
__package__ = 'plugins_pkg.pip'
import os
import sys
import site
from pathlib import Path
from typing import Optional
from benedict import benedict
from pydantic_pkgr import PipProvider, BinName, BinProviderName
from archivebox.config import CONSTANTS
import abx
from abx.archivebox.base_binary import BaseBinProvider
from abx_plugin_default_binproviders import get_BINPROVIDERS
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
env = DEFAULT_BINPROVIDERS.env
apt = DEFAULT_BINPROVIDERS.apt
brew = DEFAULT_BINPROVIDERS.brew
###################### Config ##########################
class SystemPipBinProvider(PipProvider, BaseBinProvider):
class SystemPipBinProvider(PipProvider):
name: BinProviderName = "sys_pip"
INSTALLER_BIN: BinName = "pip"
@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider):
# never modify system pip packages
return 'refusing to install packages globally with system pip, use a venv instead'
class SystemPipxBinProvider(PipProvider, BaseBinProvider):
class SystemPipxBinProvider(PipProvider):
name: BinProviderName = "pipx"
INSTALLER_BIN: BinName = "pipx"
@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider):
IS_INSIDE_VENV = sys.prefix != sys.base_prefix
class VenvPipBinProvider(PipProvider, BaseBinProvider):
class VenvPipBinProvider(PipProvider):
name: BinProviderName = "venv_pip"
INSTALLER_BIN: BinName = "pip"
@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider):
return None
class LibPipBinProvider(PipProvider, BaseBinProvider):
class LibPipBinProvider(PipProvider):
name: BinProviderName = "lib_pip"
INSTALLER_BIN: BinName = "pip"
pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv'
pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv')
def setup(self) -> None:
# update paths from config if they arent the default
from archivebox.config.common import STORAGE_CONFIG
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
# update venv path to match most up-to-date LIB_DIR based on runtime config
LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
self.pip_venv = LIB_DIR / 'pip' / 'venv'
super().setup()
SYS_PIP_BINPROVIDER = SystemPipBinProvider()

View file

@ -0,0 +1,22 @@
[project]
name = "abx-plugin-pip-binprovider"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
"django>=5.0.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider"

View file

@ -1,30 +1,18 @@
__package__ = 'plugins_pkg.playwright'
__label__ = 'playwright'
__version__ = '2024.10.14'
__package__ = 'abx_plugin_playwright_binprovider'
__id__ = 'playwright'
__label__ = 'Playwright'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/microsoft/playwright-python'
import abx
@abx.hookimpl
def get_PLUGIN():
return {
'playwright': {
'PACKAGE': __package__,
'LABEL': __label__,
'VERSION': __version__,
'AUTHOR': __author__,
'HOMEPAGE': __homepage__,
}
}
@abx.hookimpl
def get_CONFIG():
from .config import PLAYWRIGHT_CONFIG
return {
'playwright': PLAYWRIGHT_CONFIG
__id__: PLAYWRIGHT_CONFIG
}
@abx.hookimpl

View file

@ -1,20 +1,18 @@
__package__ = 'plugins_pkg.playwright'
__package__ = 'abx_plugin_playwright_binprovider'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinName, BinProvider
from pydantic_pkgr import BinName, BinProvider, Binary
from abx.archivebox.base_binary import BaseBinary, env
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
from abx_plugin_default_binproviders import env
from .config import PLAYWRIGHT_CONFIG
class PlaywrightBinary(BaseBinary):
class PlaywrightBinary(Binary):
name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]

View file

@ -1,6 +1,7 @@
__package__ = 'plugins_pkg.playwright'
__package__ = 'abx_plugin_playwright_binprovider'
import os
import shutil
import platform
from pathlib import Path
from typing import List, Optional, Dict, ClassVar
@ -8,6 +9,7 @@ from typing import List, Optional, Dict, ClassVar
from pydantic import computed_field, Field
from pydantic_pkgr import (
BinName,
BinProvider,
BinProviderName,
BinProviderOverrides,
InstallArgs,
@ -18,11 +20,8 @@ from pydantic_pkgr import (
DEFAULT_ENV_PATH,
)
from archivebox.config import CONSTANTS
import abx
from abx.archivebox.base_binary import BaseBinProvider, env
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER
from .binaries import PLAYWRIGHT_BINARY
@ -31,11 +30,11 @@ MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright")
LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright")
class PlaywrightBinProvider(BaseBinProvider):
class PlaywrightBinProvider(BinProvider):
name: BinProviderName = "playwright"
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}"
playwright_browsers_dir: Path = (
MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
@ -59,12 +58,12 @@ class PlaywrightBinProvider(BaseBinProvider):
return None
def setup(self) -> None:
# update paths from config if they arent the default
from archivebox.config.common import STORAGE_CONFIG
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
# update paths from config at runtime
LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
assert shutil.which('pip'), "Pip bin provider not initialized"
if self.playwright_browsers_dir:
self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)

View file

@ -1,7 +1,4 @@
__package__ = 'playwright'
from abx.archivebox.base_configset import BaseConfigSet
from abx_spec_config import BaseConfigSet
class PlaywrightConfigs(BaseConfigSet):
PLAYWRIGHT_BINARY: str = 'playwright'

View file

@ -0,0 +1,20 @@
[project]
name = "abx-plugin-playwright-binprovider"
version = "2024.10.24"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic>=2.4.2",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-config>=0.1.0",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider"

View file

@ -0,0 +1,7 @@
[project]
name = "abx-pocket-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -42,7 +42,8 @@ class PuppeteerBinProvider(BaseBinProvider):
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
def setup(self) -> None:
# update paths from config
# update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time
# we want to avoid depending on archivebox from abx code if at all possible
from archivebox.config.common import STORAGE_CONFIG
self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')

View file

@ -0,0 +1,7 @@
[project]
name = "abx-puppeteer-binprovider"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-readability-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

View file

@ -0,0 +1,7 @@
[project]
name = "abx-readwise-extractor"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = []

Some files were not shown because too many files have changed in this diff Show more