mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-25 05:30:23 +00:00
wip
This commit is contained in:
parent
4b6f08b0fe
commit
5d9a32c364
178 changed files with 2982 additions and 1322 deletions
|
@ -1,131 +0,0 @@
|
|||
__package__ = 'abx'
|
||||
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
from typing import Dict, Callable, List
|
||||
|
||||
from . import hookspec as base_spec
|
||||
from abx.hookspec import hookimpl, hookspec # noqa
|
||||
from abx.manager import pm, PluginManager # noqa
|
||||
|
||||
|
||||
pm.add_hookspecs(base_spec)
|
||||
|
||||
|
||||
###### PLUGIN DISCOVERY AND LOADING ########################################################
|
||||
|
||||
def get_plugin_order(plugin_entrypoint: Path):
|
||||
order = 999
|
||||
try:
|
||||
# if .plugin_order file exists, use it to set the load priority
|
||||
order = int((plugin_entrypoint.parent / '.plugin_order').read_text())
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return (order, plugin_entrypoint)
|
||||
|
||||
def register_hookspecs(hookspecs: List[str]):
|
||||
"""
|
||||
Register all the hookspecs from a list of module names.
|
||||
"""
|
||||
for hookspec_import_path in hookspecs:
|
||||
hookspec_module = importlib.import_module(hookspec_import_path)
|
||||
pm.add_hookspecs(hookspec_module)
|
||||
|
||||
|
||||
def find_plugins_in_dir(plugins_dir: Path, prefix: str) -> Dict[str, Path]:
|
||||
"""
|
||||
Find all the plugins in a given directory. Just looks for an __init__.py file.
|
||||
"""
|
||||
return {
|
||||
f"{prefix}.{plugin_entrypoint.parent.name}": plugin_entrypoint.parent
|
||||
for plugin_entrypoint in sorted(plugins_dir.glob("*/__init__.py"), key=get_plugin_order)
|
||||
if plugin_entrypoint.parent.name != 'abx'
|
||||
} # "plugins_pkg.pip": "/app/archivebox/plugins_pkg/pip"
|
||||
|
||||
|
||||
def get_pip_installed_plugins(group='abx'):
|
||||
"""replaces pm.load_setuptools_entrypoints("abx"), finds plugins that registered entrypoints via pip"""
|
||||
import importlib.metadata
|
||||
|
||||
DETECTED_PLUGINS = {} # module_name: module_dir_path
|
||||
for dist in list(importlib.metadata.distributions()):
|
||||
for entrypoint in dist.entry_points:
|
||||
if entrypoint.group != group or pm.is_blocked(entrypoint.name):
|
||||
continue
|
||||
DETECTED_PLUGINS[entrypoint.name] = Path(entrypoint.load().__file__).parent
|
||||
# pm.register(plugin, name=ep.name)
|
||||
# pm._plugin_distinfo.append((plugin, DistFacade(dist)))
|
||||
return DETECTED_PLUGINS
|
||||
|
||||
|
||||
def get_plugins_in_dirs(plugin_dirs: Dict[str, Path]):
|
||||
"""
|
||||
Get the mapping of dir_name: {plugin_id: plugin_dir} for all plugins in the given directories.
|
||||
"""
|
||||
DETECTED_PLUGINS = {}
|
||||
for plugin_prefix, plugin_dir in plugin_dirs.items():
|
||||
DETECTED_PLUGINS.update(find_plugins_in_dir(plugin_dir, prefix=plugin_prefix))
|
||||
return DETECTED_PLUGINS
|
||||
|
||||
|
||||
# Load all plugins from pip packages, archivebox built-ins, and user plugins
|
||||
|
||||
def load_plugins(plugins_dict: Dict[str, Path]):
|
||||
"""
|
||||
Load all the plugins from a dictionary of module names and directory paths.
|
||||
"""
|
||||
LOADED_PLUGINS = {}
|
||||
for plugin_module, plugin_dir in plugins_dict.items():
|
||||
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
|
||||
plugin_module_loaded = importlib.import_module(plugin_module)
|
||||
pm.register(plugin_module_loaded)
|
||||
LOADED_PLUGINS[plugin_module] = plugin_module_loaded.PLUGIN
|
||||
# print(f' √ Loaded plugin: {plugin_module}')
|
||||
return LOADED_PLUGINS
|
||||
|
||||
def get_registered_plugins():
|
||||
"""
|
||||
Get all the plugins registered with Pluggy.
|
||||
"""
|
||||
plugins = {}
|
||||
plugin_to_distinfo = dict(pm.list_plugin_distinfo())
|
||||
for plugin in pm.get_plugins():
|
||||
plugin_info = {
|
||||
"name": plugin.__name__,
|
||||
"hooks": [h.name for h in pm.get_hookcallers(plugin) or ()],
|
||||
}
|
||||
distinfo = plugin_to_distinfo.get(plugin)
|
||||
if distinfo:
|
||||
plugin_info["version"] = distinfo.version
|
||||
plugin_info["name"] = (
|
||||
getattr(distinfo, "name", None) or distinfo.project_name
|
||||
)
|
||||
plugins[plugin_info["name"]] = plugin_info
|
||||
return plugins
|
||||
|
||||
|
||||
|
||||
|
||||
def get_plugin_hooks(plugin_pkg: str | None) -> Dict[str, Callable]:
|
||||
"""
|
||||
Get all the functions marked with @hookimpl on a module.
|
||||
"""
|
||||
if not plugin_pkg:
|
||||
return {}
|
||||
|
||||
hooks = {}
|
||||
|
||||
plugin_module = importlib.import_module(plugin_pkg)
|
||||
for attr_name in dir(plugin_module):
|
||||
if attr_name.startswith('_'):
|
||||
continue
|
||||
try:
|
||||
attr = getattr(plugin_module, attr_name)
|
||||
if isinstance(attr, Callable):
|
||||
hooks[attr_name] = None
|
||||
pm.parse_hookimpl_opts(plugin_module, attr_name)
|
||||
hooks[attr_name] = attr
|
||||
except Exception as e:
|
||||
print(f'Error getting hookimpls for {plugin_pkg}: {e}')
|
||||
|
||||
return hooks
|
|
@ -1,30 +0,0 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
import os
|
||||
import importlib
|
||||
|
||||
from typing import Dict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def load_archivebox_plugins(pm, plugins_dict: Dict[str, Path]):
|
||||
"""Load archivebox plugins, very similar to abx.load_plugins but it looks for a pydantic PLUGIN model + hooks in apps.py"""
|
||||
LOADED_PLUGINS = {}
|
||||
for plugin_module, plugin_dir in reversed(plugins_dict.items()):
|
||||
# print(f'Loading plugin: {plugin_module} from {plugin_dir}')
|
||||
|
||||
# 1. register the plugin module directly in case it contains any look hookimpls (e.g. in __init__.py)
|
||||
try:
|
||||
plugin_module_loaded = importlib.import_module(plugin_module)
|
||||
pm.register(plugin_module_loaded)
|
||||
except Exception as e:
|
||||
print(f'Error registering plugin: {plugin_module} - {e}')
|
||||
|
||||
|
||||
# 2. then try to import plugin_module.apps as well
|
||||
if os.access(plugin_dir / 'apps.py', os.R_OK):
|
||||
plugin_apps = importlib.import_module(plugin_module + '.apps')
|
||||
pm.register(plugin_apps) # register the whole .apps in case it contains loose hookimpls (not in a class)
|
||||
|
||||
# print(f' √ Loaded plugin: {plugin_module} {len(archivebox_plugins_found) * "🧩"}')
|
||||
return LOADED_PLUGINS
|
|
@ -1,117 +0,0 @@
|
|||
__package__ = "abx.archivebox"
|
||||
|
||||
import os
|
||||
from typing import Optional, cast
|
||||
from typing_extensions import Self
|
||||
|
||||
from pydantic import validate_call
|
||||
from pydantic_pkgr import (
|
||||
Binary,
|
||||
BinProvider,
|
||||
BinProviderName,
|
||||
AptProvider,
|
||||
BrewProvider,
|
||||
EnvProvider,
|
||||
)
|
||||
|
||||
from archivebox.config.permissions import ARCHIVEBOX_USER
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
class BaseBinProvider(BinProvider):
|
||||
|
||||
# TODO: add install/load/load_or_install methods as abx.hookimpl methods
|
||||
|
||||
@property
|
||||
def admin_url(self) -> str:
|
||||
# e.g. /admin/environment/binproviders/NpmBinProvider/ TODO
|
||||
return "/admin/environment/binaries/"
|
||||
|
||||
@abx.hookimpl
|
||||
def get_BINPROVIDERS(self):
|
||||
return [self]
|
||||
|
||||
class BaseBinary(Binary):
|
||||
# TODO: formalize state diagram, final states, transitions, side effects, etc.
|
||||
|
||||
@staticmethod
|
||||
def symlink_to_lib(binary, bin_dir=None) -> None:
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
|
||||
|
||||
if not (binary.abspath and os.access(binary.abspath, os.R_OK)):
|
||||
return
|
||||
|
||||
try:
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
symlink = bin_dir / binary.name
|
||||
symlink.unlink(missing_ok=True)
|
||||
symlink.symlink_to(binary.abspath)
|
||||
symlink.chmod(0o777) # make sure its executable by everyone
|
||||
except Exception as err:
|
||||
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
|
||||
# not actually needed, we can just run without it
|
||||
pass
|
||||
|
||||
@validate_call
|
||||
def load(self, fresh=False, **kwargs) -> Self:
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
if fresh:
|
||||
binary = super().load(**kwargs)
|
||||
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||
else:
|
||||
# get cached binary from db
|
||||
try:
|
||||
from machine.models import InstalledBinary
|
||||
installed_binary = InstalledBinary.objects.get_from_db_or_cache(self) # type: ignore
|
||||
binary = InstalledBinary.load_from_db(installed_binary)
|
||||
except Exception:
|
||||
# maybe we are not in a DATA dir so there is no db, fallback to reading from fs
|
||||
# (e.g. when archivebox version is run outside of a DATA dir)
|
||||
binary = super().load(**kwargs)
|
||||
return cast(Self, binary)
|
||||
|
||||
@validate_call
|
||||
def install(self, **kwargs) -> Self:
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
binary = super().install(**kwargs)
|
||||
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||
return binary
|
||||
|
||||
@validate_call
|
||||
def load_or_install(self, fresh=False, **kwargs) -> Self:
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
try:
|
||||
binary = self.load(fresh=fresh)
|
||||
if binary and binary.version:
|
||||
self.symlink_to_lib(binary=binary, bin_dir=STORAGE_CONFIG.LIB_DIR / 'bin')
|
||||
return binary
|
||||
except Exception:
|
||||
pass
|
||||
return self.install(**kwargs)
|
||||
|
||||
@property
|
||||
def admin_url(self) -> str:
|
||||
# e.g. /admin/environment/config/LdapConfig/
|
||||
return f"/admin/environment/binaries/{self.name}/"
|
||||
|
||||
@abx.hookimpl
|
||||
def get_BINARIES(self):
|
||||
return [self]
|
||||
|
||||
|
||||
class AptBinProvider(AptProvider, BaseBinProvider):
|
||||
name: BinProviderName = "apt"
|
||||
|
||||
class BrewBinProvider(BrewProvider, BaseBinProvider):
|
||||
name: BinProviderName = "brew"
|
||||
|
||||
class EnvBinProvider(EnvProvider, BaseBinProvider):
|
||||
name: BinProviderName = "env"
|
||||
|
||||
euid: Optional[int] = ARCHIVEBOX_USER
|
||||
|
||||
apt = AptBinProvider()
|
||||
brew = BrewBinProvider()
|
||||
env = EnvBinProvider()
|
|
@ -1,204 +0,0 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from typing import Optional, List, Literal, Annotated, Dict, Any, Tuple
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import AfterValidator
|
||||
from pydantic_pkgr import BinName
|
||||
from django.utils.functional import cached_property
|
||||
from django.utils import timezone
|
||||
|
||||
import abx
|
||||
|
||||
from .base_binary import BaseBinary
|
||||
|
||||
|
||||
def assert_no_empty_args(args: List[str]) -> List[str]:
|
||||
assert all(len(arg) for arg in args)
|
||||
return args
|
||||
|
||||
ExtractorName = Annotated[str, AfterValidator(lambda s: s.isidentifier())]
|
||||
|
||||
HandlerFuncStr = Annotated[str, AfterValidator(lambda s: s.startswith('self.'))]
|
||||
CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_empty_args)]
|
||||
|
||||
|
||||
class BaseExtractor:
|
||||
name: ExtractorName
|
||||
binary: BinName
|
||||
|
||||
default_args: CmdArgsList = []
|
||||
extra_args: CmdArgsList = []
|
||||
|
||||
def get_output_path(self, snapshot) -> Path:
|
||||
return Path(self.__class__.__name__.lower())
|
||||
|
||||
def should_extract(self, uri: str, config: dict | None=None) -> bool:
|
||||
try:
|
||||
assert self.detect_installed_binary().version
|
||||
except Exception:
|
||||
raise
|
||||
# could not load binary
|
||||
return False
|
||||
|
||||
# output_dir = self.get_output_path(snapshot)
|
||||
# if output_dir.glob('*.*'):
|
||||
# return False
|
||||
return True
|
||||
|
||||
@abx.hookimpl
|
||||
def extract(self, snapshot_id: str) -> Dict[str, Any]:
|
||||
from core.models import Snapshot
|
||||
from archivebox import CONSTANTS
|
||||
|
||||
snapshot = Snapshot.objects.get(id=snapshot_id)
|
||||
|
||||
if not self.should_extract(snapshot.url):
|
||||
return {}
|
||||
|
||||
status = 'failed'
|
||||
start_ts = timezone.now()
|
||||
uplink = self.detect_network_interface()
|
||||
installed_binary = self.detect_installed_binary()
|
||||
machine = installed_binary.machine
|
||||
assert uplink.machine == installed_binary.machine # it would be *very* weird if this wasn't true
|
||||
|
||||
output_dir = CONSTANTS.DATA_DIR / '.tmp' / 'extractors' / self.name / str(snapshot.abid)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# execute the extractor binary with the given args
|
||||
args = [snapshot.url, *self.args] if self.args is not None else [snapshot.url, *self.default_args, *self.extra_args]
|
||||
cmd = [str(installed_binary.abspath), *args]
|
||||
proc = self.exec(installed_binary=installed_binary, args=args, cwd=output_dir)
|
||||
|
||||
# collect the output
|
||||
end_ts = timezone.now()
|
||||
output_files = list(str(path.relative_to(output_dir)) for path in output_dir.glob('**/*.*'))
|
||||
stdout = proc.stdout.strip()
|
||||
stderr = proc.stderr.strip()
|
||||
output_json = None
|
||||
output_text = stdout
|
||||
try:
|
||||
output_json = json.loads(stdout.strip())
|
||||
output_text = None
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
errors = []
|
||||
if proc.returncode == 0:
|
||||
status = 'success'
|
||||
else:
|
||||
errors.append(f'{installed_binary.name} returned non-zero exit code: {proc.returncode}')
|
||||
|
||||
# increment health stats counters
|
||||
if status == 'success':
|
||||
machine.record_health_success()
|
||||
uplink.record_health_success()
|
||||
installed_binary.record_health_success()
|
||||
else:
|
||||
machine.record_health_failure()
|
||||
uplink.record_health_failure()
|
||||
installed_binary.record_health_failure()
|
||||
|
||||
return {
|
||||
'extractor': self.name,
|
||||
|
||||
'snapshot': {
|
||||
'id': snapshot.id,
|
||||
'abid': snapshot.abid,
|
||||
'url': snapshot.url,
|
||||
'created_by_id': snapshot.created_by_id,
|
||||
},
|
||||
|
||||
'machine': {
|
||||
'id': machine.id,
|
||||
'abid': machine.abid,
|
||||
'guid': machine.guid,
|
||||
'hostname': machine.hostname,
|
||||
'hw_in_docker': machine.hw_in_docker,
|
||||
'hw_in_vm': machine.hw_in_vm,
|
||||
'hw_manufacturer': machine.hw_manufacturer,
|
||||
'hw_product': machine.hw_product,
|
||||
'hw_uuid': machine.hw_uuid,
|
||||
'os_arch': machine.os_arch,
|
||||
'os_family': machine.os_family,
|
||||
'os_platform': machine.os_platform,
|
||||
'os_release': machine.os_release,
|
||||
'os_kernel': machine.os_kernel,
|
||||
},
|
||||
|
||||
'uplink': {
|
||||
'id': uplink.id,
|
||||
'abid': uplink.abid,
|
||||
'mac_address': uplink.mac_address,
|
||||
'ip_public': uplink.ip_public,
|
||||
'ip_local': uplink.ip_local,
|
||||
'dns_server': uplink.dns_server,
|
||||
'hostname': uplink.hostname,
|
||||
'iface': uplink.iface,
|
||||
'isp': uplink.isp,
|
||||
'city': uplink.city,
|
||||
'region': uplink.region,
|
||||
'country': uplink.country,
|
||||
},
|
||||
|
||||
'binary': {
|
||||
'id': installed_binary.id,
|
||||
'abid': installed_binary.abid,
|
||||
'name': installed_binary.name,
|
||||
'binprovider': installed_binary.binprovider,
|
||||
'abspath': installed_binary.abspath,
|
||||
'version': installed_binary.version,
|
||||
'sha256': installed_binary.sha256,
|
||||
},
|
||||
|
||||
'cmd': cmd,
|
||||
'stdout': stdout,
|
||||
'stderr': stderr,
|
||||
'returncode': proc.returncode,
|
||||
'start_ts': start_ts,
|
||||
'end_ts': end_ts,
|
||||
|
||||
'status': status,
|
||||
'errors': errors,
|
||||
'output_dir': str(output_dir.relative_to(CONSTANTS.DATA_DIR)),
|
||||
'output_files': output_files,
|
||||
'output_json': output_json or {},
|
||||
'output_text': output_text or '',
|
||||
}
|
||||
|
||||
# TODO: move this to a hookimpl
|
||||
def exec(self, args: CmdArgsList=(), cwd: Optional[Path]=None, installed_binary=None):
|
||||
cwd = cwd or Path(os.getcwd())
|
||||
binary = self.load_binary(installed_binary=installed_binary)
|
||||
|
||||
return binary.exec(cmd=args, cwd=cwd)
|
||||
|
||||
@cached_property
|
||||
def BINARY(self) -> BaseBinary:
|
||||
import abx.archivebox.reads
|
||||
for binary in abx.archivebox.reads.get_BINARIES().values():
|
||||
if binary.name == self.binary:
|
||||
return binary
|
||||
raise ValueError(f'Binary {self.binary} not found')
|
||||
|
||||
def detect_installed_binary(self):
|
||||
from machine.models import InstalledBinary
|
||||
# hydrates binary from DB/cache if record of installed version is recent enough
|
||||
# otherwise it finds it from scratch by detecting installed version/abspath/sha256 on host
|
||||
return InstalledBinary.objects.get_from_db_or_cache(self.BINARY)
|
||||
|
||||
def load_binary(self, installed_binary=None) -> BaseBinary:
|
||||
installed_binary = installed_binary or self.detect_installed_binary()
|
||||
return installed_binary.load_from_db()
|
||||
|
||||
def detect_network_interface(self):
|
||||
from machine.models import NetworkInterface
|
||||
return NetworkInterface.objects.current()
|
||||
|
||||
@abx.hookimpl
|
||||
def get_EXTRACTORS(self):
|
||||
return [self]
|
|
@ -1,25 +0,0 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
class BaseReplayer:
|
||||
"""Describes how to render an ArchiveResult in several contexts"""
|
||||
|
||||
url_pattern: str = '*'
|
||||
|
||||
row_template: str = 'plugins/generic_replayer/templates/row.html'
|
||||
embed_template: str = 'plugins/generic_replayer/templates/embed.html'
|
||||
fullpage_template: str = 'plugins/generic_replayer/templates/fullpage.html'
|
||||
|
||||
# row_view: LazyImportStr = 'plugins.generic_replayer.views.row_view'
|
||||
# embed_view: LazyImportStr = 'plugins.generic_replayer.views.embed_view'
|
||||
# fullpage_view: LazyImportStr = 'plugins.generic_replayer.views.fullpage_view'
|
||||
# icon_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
|
||||
# thumbnail_view: LazyImportStr = 'plugins.generic_replayer.views.get_icon'
|
||||
|
||||
@abx.hookimpl
|
||||
def get_REPLAYERS(self):
|
||||
return [self]
|
||||
|
||||
# TODO: add hookimpl methods for get_row_template, get_embed_template, get_fullpage_template, etc...
|
|
@ -1,52 +0,0 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
from typing import Dict, Any
|
||||
|
||||
from .. import hookspec
|
||||
|
||||
from .base_binary import BaseBinary, BaseBinProvider
|
||||
from .base_configset import BaseConfigSet
|
||||
from .base_extractor import BaseExtractor
|
||||
from .base_searchbackend import BaseSearchBackend
|
||||
|
||||
|
||||
@hookspec
|
||||
def get_PLUGIN() -> Dict[str, Dict[str, Any]]:
|
||||
return {}
|
||||
|
||||
@hookspec
|
||||
def get_CONFIG() -> Dict[str, BaseConfigSet]:
|
||||
return {}
|
||||
|
||||
|
||||
|
||||
@hookspec
|
||||
def get_EXTRACTORS() -> Dict[str, BaseExtractor]:
|
||||
return {}
|
||||
|
||||
@hookspec
|
||||
def get_SEARCHBACKENDS() -> Dict[str, BaseSearchBackend]:
|
||||
return {}
|
||||
|
||||
# @hookspec
|
||||
# def get_REPLAYERS() -> Dict[str, BaseReplayer]:
|
||||
# return {}
|
||||
|
||||
# @hookspec
|
||||
# def get_ADMINDATAVIEWS():
|
||||
# return {}
|
||||
|
||||
# @hookspec
|
||||
# def get_QUEUES():
|
||||
# return {}
|
||||
|
||||
|
||||
##############################################################
|
||||
# provided by abx.pydantic_pkgr.hookspec:
|
||||
# @hookspec
|
||||
# def get_BINARIES() -> Dict[str, BaseBinary]:
|
||||
# return {}
|
||||
|
||||
# @hookspec
|
||||
# def get_BINPROVIDERS() -> Dict[str, BaseBinProvider]:
|
||||
# return {}
|
|
@ -1,160 +0,0 @@
|
|||
__package__ = 'abx.archivebox'
|
||||
|
||||
import importlib
|
||||
from typing import Dict, Set, Any, TYPE_CHECKING
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
import abx
|
||||
from .. import pm
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .base_configset import BaseConfigSet
|
||||
from .base_binary import BaseBinary, BaseBinProvider
|
||||
from .base_extractor import BaseExtractor
|
||||
from .base_searchbackend import BaseSearchBackend
|
||||
# from .base_replayer import BaseReplayer
|
||||
# from .base_queue import BaseQueue
|
||||
# from .base_admindataview import BaseAdminDataView
|
||||
|
||||
# API exposed to ArchiveBox code
|
||||
|
||||
def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
|
||||
return benedict({
|
||||
plugin_id: plugin
|
||||
for plugin_dict in pm.hook.get_PLUGIN()
|
||||
for plugin_id, plugin in plugin_dict.items()
|
||||
})
|
||||
|
||||
def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
|
||||
plugin_info = get_PLUGINS().get(plugin_id, {})
|
||||
package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
|
||||
if not package:
|
||||
return {'id': plugin_id, 'hooks': {}}
|
||||
module = importlib.import_module(package)
|
||||
hooks = abx.get_plugin_hooks(module.__package__)
|
||||
assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
|
||||
|
||||
return benedict({
|
||||
'id': plugin_id,
|
||||
'label': getattr(module, '__label__', plugin_id),
|
||||
'module': module,
|
||||
'package': module.__package__,
|
||||
'hooks': hooks,
|
||||
'version': getattr(module, '__version__', '999.999.999'),
|
||||
'author': getattr(module, '__author__', 'Unknown'),
|
||||
'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
|
||||
'dependencies': getattr(module, '__dependencies__', []),
|
||||
'source_code': module.__file__,
|
||||
**plugin_info,
|
||||
})
|
||||
|
||||
|
||||
def get_HOOKS() -> Set[str]:
|
||||
return {
|
||||
hook_name
|
||||
for plugin_id in get_PLUGINS().keys()
|
||||
for hook_name in get_PLUGIN(plugin_id).hooks
|
||||
}
|
||||
|
||||
def get_CONFIGS() -> benedict: # Dict[str, 'BaseConfigSet']
|
||||
return benedict({
|
||||
config_id: configset
|
||||
for plugin_configs in pm.hook.get_CONFIG()
|
||||
for config_id, configset in plugin_configs.items()
|
||||
})
|
||||
|
||||
|
||||
def get_FLAT_CONFIG() -> Dict[str, Any]:
|
||||
return benedict({
|
||||
key: value
|
||||
for configset in get_CONFIGS().values()
|
||||
for key, value in configset.model_dump().items()
|
||||
})
|
||||
|
||||
def get_BINPROVIDERS() -> Dict[str, 'BaseBinProvider']:
|
||||
# TODO: move these to plugins
|
||||
from abx.archivebox.base_binary import apt, brew, env
|
||||
builtin_binproviders = {
|
||||
'env': env,
|
||||
'apt': apt,
|
||||
'brew': brew,
|
||||
}
|
||||
|
||||
return benedict({
|
||||
binprovider_id: binprovider
|
||||
for plugin_binproviders in [builtin_binproviders, *pm.hook.get_BINPROVIDERS()]
|
||||
for binprovider_id, binprovider in plugin_binproviders.items()
|
||||
})
|
||||
|
||||
def get_BINARIES() -> Dict[str, 'BaseBinary']:
|
||||
return benedict({
|
||||
binary_id: binary
|
||||
for plugin_binaries in pm.hook.get_BINARIES()
|
||||
for binary_id, binary in plugin_binaries.items()
|
||||
})
|
||||
|
||||
def get_EXTRACTORS() -> Dict[str, 'BaseExtractor']:
|
||||
return benedict({
|
||||
extractor_id: extractor
|
||||
for plugin_extractors in pm.hook.get_EXTRACTORS()
|
||||
for extractor_id, extractor in plugin_extractors.items()
|
||||
})
|
||||
|
||||
# def get_REPLAYERS() -> Dict[str, 'BaseReplayer']:
|
||||
# return benedict({
|
||||
# replayer.id: replayer
|
||||
# for plugin_replayers in pm.hook.get_REPLAYERS()
|
||||
# for replayer in plugin_replayers
|
||||
# })
|
||||
|
||||
# def get_ADMINDATAVIEWS() -> Dict[str, 'BaseAdminDataView']:
|
||||
# return benedict({
|
||||
# admin_dataview.id: admin_dataview
|
||||
# for plugin_admin_dataviews in pm.hook.get_ADMINDATAVIEWS()
|
||||
# for admin_dataview in plugin_admin_dataviews
|
||||
# })
|
||||
|
||||
# def get_QUEUES() -> Dict[str, 'BaseQueue']:
|
||||
# return benedict({
|
||||
# queue.id: queue
|
||||
# for plugin_queues in pm.hook.get_QUEUES()
|
||||
# for queue in plugin_queues
|
||||
# })
|
||||
|
||||
def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
|
||||
return benedict({
|
||||
searchbackend_id: searchbackend
|
||||
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
|
||||
for searchbackend_id,searchbackend in plugin_searchbackends.items()
|
||||
})
|
||||
|
||||
|
||||
|
||||
def get_scope_config(defaults: benedict | None = None, persona=None, seed=None, crawl=None, snapshot=None, archiveresult=None, extra_config=None):
|
||||
"""Get all the relevant config for the given scope, in correct precedence order"""
|
||||
|
||||
from django.conf import settings
|
||||
default_config: benedict = defaults or settings.CONFIG
|
||||
|
||||
snapshot = snapshot or (archiveresult and archiveresult.snapshot)
|
||||
crawl = crawl or (snapshot and snapshot.crawl)
|
||||
seed = seed or (crawl and crawl.seed)
|
||||
persona = persona or (crawl and crawl.persona)
|
||||
|
||||
persona_config = persona.config if persona else {}
|
||||
seed_config = seed.config if seed else {}
|
||||
crawl_config = crawl.config if crawl else {}
|
||||
snapshot_config = snapshot.config if snapshot else {}
|
||||
archiveresult_config = archiveresult.config if archiveresult else {}
|
||||
extra_config = extra_config or {}
|
||||
|
||||
return {
|
||||
**default_config, # defaults / config file / environment variables
|
||||
**persona_config, # lowest precedence
|
||||
**seed_config,
|
||||
**crawl_config,
|
||||
**snapshot_config,
|
||||
**archiveresult_config,
|
||||
**extra_config, # highest precedence
|
||||
}
|
|
@ -1 +0,0 @@
|
|||
__package__ = 'abx.django'
|
|
@ -1,101 +0,0 @@
|
|||
__package__ = 'abx.django'
|
||||
|
||||
import itertools
|
||||
# from benedict import benedict
|
||||
|
||||
from .. import pm
|
||||
|
||||
|
||||
def get_INSTALLED_APPS():
|
||||
return itertools.chain(*reversed(pm.hook.get_INSTALLED_APPS()))
|
||||
|
||||
# def register_INSTALLLED_APPS(INSTALLED_APPS):
|
||||
# pm.hook.register_INSTALLED_APPS(INSTALLED_APPS=INSTALLED_APPS)
|
||||
|
||||
|
||||
def get_MIDDLEWARES():
|
||||
return itertools.chain(*reversed(pm.hook.get_MIDDLEWARE()))
|
||||
|
||||
# def register_MIDDLEWARES(MIDDLEWARE):
|
||||
# pm.hook.register_MIDDLEWARE(MIDDLEWARE=MIDDLEWARE)
|
||||
|
||||
|
||||
def get_AUTHENTICATION_BACKENDS():
|
||||
return itertools.chain(*reversed(pm.hook.get_AUTHENTICATION_BACKENDS()))
|
||||
|
||||
# def register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS):
|
||||
# pm.hook.register_AUTHENTICATION_BACKENDS(AUTHENTICATION_BACKENDS=AUTHENTICATION_BACKENDS)
|
||||
|
||||
|
||||
def get_STATICFILES_DIRS():
|
||||
return itertools.chain(*reversed(pm.hook.get_STATICFILES_DIRS()))
|
||||
|
||||
# def register_STATICFILES_DIRS(STATICFILES_DIRS):
|
||||
# pm.hook.register_STATICFILES_DIRS(STATICFILES_DIRS=STATICFILES_DIRS)
|
||||
|
||||
|
||||
def get_TEMPLATE_DIRS():
|
||||
return itertools.chain(*reversed(pm.hook.get_TEMPLATE_DIRS()))
|
||||
|
||||
# def register_TEMPLATE_DIRS(TEMPLATE_DIRS):
|
||||
# pm.hook.register_TEMPLATE_DIRS(TEMPLATE_DIRS=TEMPLATE_DIRS)
|
||||
|
||||
def get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME='queue.sqlite3'):
|
||||
HUEY_QUEUES = {}
|
||||
for plugin_result in pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=QUEUE_DATABASE_NAME):
|
||||
HUEY_QUEUES.update(plugin_result)
|
||||
return HUEY_QUEUES
|
||||
|
||||
# def register_DJANGO_HUEY(DJANGO_HUEY):
|
||||
# pm.hook.register_DJANGO_HUEY(DJANGO_HUEY=DJANGO_HUEY)
|
||||
|
||||
def get_ADMIN_DATA_VIEWS_URLS():
|
||||
return itertools.chain(*reversed(pm.hook.get_ADMIN_DATA_VIEWS_URLS()))
|
||||
|
||||
# def register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS):
|
||||
# pm.hook.register_ADMIN_DATA_VIEWS(ADMIN_DATA_VIEWS=ADMIN_DATA_VIEWS)
|
||||
|
||||
|
||||
# def register_settings(settings):
|
||||
# # convert settings dict to an benedict so we can set values using settings.attr = xyz notation
|
||||
# settings_as_obj = benedict(settings, keypath_separator=None)
|
||||
|
||||
# # set default values for settings that are used by plugins
|
||||
# # settings_as_obj.INSTALLED_APPS = settings_as_obj.get('INSTALLED_APPS', [])
|
||||
# # settings_as_obj.MIDDLEWARE = settings_as_obj.get('MIDDLEWARE', [])
|
||||
# # settings_as_obj.AUTHENTICATION_BACKENDS = settings_as_obj.get('AUTHENTICATION_BACKENDS', [])
|
||||
# # settings_as_obj.STATICFILES_DIRS = settings_as_obj.get('STATICFILES_DIRS', [])
|
||||
# # settings_as_obj.TEMPLATE_DIRS = settings_as_obj.get('TEMPLATE_DIRS', [])
|
||||
# # settings_as_obj.DJANGO_HUEY = settings_as_obj.get('DJANGO_HUEY', {'queues': {}})
|
||||
# # settings_as_obj.ADMIN_DATA_VIEWS = settings_as_obj.get('ADMIN_DATA_VIEWS', {'URLS': []})
|
||||
|
||||
# # # call all the hook functions to mutate the settings values in-place
|
||||
# # register_INSTALLLED_APPS(settings_as_obj.INSTALLED_APPS)
|
||||
# # register_MIDDLEWARES(settings_as_obj.MIDDLEWARE)
|
||||
# # register_AUTHENTICATION_BACKENDS(settings_as_obj.AUTHENTICATION_BACKENDS)
|
||||
# # register_STATICFILES_DIRS(settings_as_obj.STATICFILES_DIRS)
|
||||
# # register_TEMPLATE_DIRS(settings_as_obj.TEMPLATE_DIRS)
|
||||
# # register_DJANGO_HUEY(settings_as_obj.DJANGO_HUEY)
|
||||
# # register_ADMIN_DATA_VIEWS(settings_as_obj.ADMIN_DATA_VIEWS)
|
||||
|
||||
# # calls Plugin.settings(settings) on each registered plugin
|
||||
# pm.hook.register_settings(settings=settings_as_obj)
|
||||
|
||||
# # then finally update the settings globals() object will all the new settings
|
||||
# # settings.update(settings_as_obj)
|
||||
|
||||
|
||||
def get_urlpatterns():
|
||||
return list(itertools.chain(*pm.hook.urlpatterns()))
|
||||
|
||||
def register_urlpatterns(urlpatterns):
|
||||
pm.hook.register_urlpatterns(urlpatterns=urlpatterns)
|
||||
|
||||
|
||||
def register_checks():
|
||||
"""register any django system checks"""
|
||||
pm.hook.register_checks()
|
||||
|
||||
def register_admin(admin_site):
|
||||
"""register any django admin models/views with the main django admin site instance"""
|
||||
pm.hook.register_admin(admin_site=admin_site)
|
|
@ -1,22 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
from pluggy import HookimplMarker
|
||||
from pluggy import HookspecMarker
|
||||
|
||||
spec = hookspec = HookspecMarker("abx")
|
||||
impl = hookimpl = HookimplMarker("abx")
|
||||
|
||||
|
||||
@hookspec
|
||||
@hookimpl
|
||||
def get_system_user() -> str:
|
||||
# Beware $HOME may not match current EUID, UID, PUID, SUID, there are edge cases
|
||||
# - sudo (EUD != UID != SUID)
|
||||
# - running with an autodetected UID based on data dir ownership
|
||||
# but mapping of UID:username is broken because it was created
|
||||
# by a different host system, e.g. 911's $HOME outside of docker
|
||||
# might be /usr/lib/lxd instead of /home/archivebox
|
||||
# - running as a user that doens't have a home directory
|
||||
# - home directory is set to a path that doesn't exist, or is inside a dir we cant read
|
||||
return Path('~').expanduser().name
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
import inspect
|
||||
|
||||
import pluggy
|
||||
|
||||
|
||||
class PluginManager(pluggy.PluginManager):
|
||||
"""
|
||||
Patch to fix pluggy's PluginManager to work with pydantic models.
|
||||
See: https://github.com/pytest-dev/pluggy/pull/536
|
||||
"""
|
||||
def parse_hookimpl_opts(self, plugin, name: str) -> pluggy.HookimplOpts | None:
|
||||
# IMPORTANT: @property methods can have side effects, and are never hookimpl
|
||||
# if attr is a property, skip it in advance
|
||||
plugin_class = plugin if inspect.isclass(plugin) else type(plugin)
|
||||
if isinstance(getattr(plugin_class, name, None), property):
|
||||
return None
|
||||
|
||||
# pydantic model fields are like attrs and also can never be hookimpls
|
||||
plugin_is_pydantic_obj = hasattr(plugin, "__pydantic_core_schema__")
|
||||
if plugin_is_pydantic_obj and name in getattr(plugin, "model_fields", {}):
|
||||
# pydantic models mess with the class and attr __signature__
|
||||
# so inspect.isroutine(...) throws exceptions and cant be used
|
||||
return None
|
||||
|
||||
try:
|
||||
return super().parse_hookimpl_opts(plugin, name)
|
||||
except AttributeError:
|
||||
return super().parse_hookimpl_opts(type(plugin), name)
|
||||
|
||||
pm = PluginManager("abx")
|
|
@ -1 +0,0 @@
|
|||
__package__ = 'abx.pydantic_pkgr'
|
|
@ -1,13 +0,0 @@
|
|||
|
||||
from ..hookspec import hookspec
|
||||
|
||||
###########################################################################################
|
||||
|
||||
@hookspec
|
||||
def get_BINPROVIDERS():
|
||||
return {}
|
||||
|
||||
@hookspec
|
||||
def get_BINARIES():
|
||||
return {}
|
||||
|
|
@ -9,9 +9,6 @@ from pathlib import Path
|
|||
from django.utils.crypto import get_random_string
|
||||
|
||||
import abx
|
||||
import abx.archivebox
|
||||
import abx.archivebox.reads
|
||||
import abx.django.use
|
||||
|
||||
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
|
||||
from archivebox.config.common import SHELL_CONFIG, SERVER_CONFIG # noqa
|
||||
|
@ -26,43 +23,22 @@ IS_GETTING_VERSION_OR_HELP = 'version' in sys.argv or 'help' in sys.argv or '--v
|
|||
################################################################################
|
||||
|
||||
PLUGIN_HOOKSPECS = [
|
||||
'abx.django.hookspec',
|
||||
'abx.pydantic_pkgr.hookspec',
|
||||
'abx.archivebox.hookspec',
|
||||
'abx_spec_django',
|
||||
'abx_spec_pydantic_pkgr',
|
||||
'abx_spec_config',
|
||||
'abx_spec_archivebox',
|
||||
]
|
||||
abx.register_hookspecs(PLUGIN_HOOKSPECS)
|
||||
|
||||
BUILTIN_PLUGIN_DIRS = {
|
||||
'archivebox': PACKAGE_DIR,
|
||||
'plugins_pkg': PACKAGE_DIR / 'plugins_pkg',
|
||||
'plugins_auth': PACKAGE_DIR / 'plugins_auth',
|
||||
'plugins_search': PACKAGE_DIR / 'plugins_search',
|
||||
'plugins_extractor': PACKAGE_DIR / 'plugins_extractor',
|
||||
}
|
||||
USER_PLUGIN_DIRS = {
|
||||
# 'user_plugins': DATA_DIR / 'user_plugins',
|
||||
}
|
||||
SYSTEM_PLUGINS = abx.get_pip_installed_plugins(group='abx')
|
||||
USER_PLUGINS = abx.find_plugins_in_dir(DATA_DIR / 'user_plugins')
|
||||
|
||||
# Discover ArchiveBox plugins
|
||||
BUILTIN_PLUGINS = abx.get_plugins_in_dirs(BUILTIN_PLUGIN_DIRS)
|
||||
PIP_PLUGINS = abx.get_pip_installed_plugins(group='archivebox')
|
||||
USER_PLUGINS = abx.get_plugins_in_dirs(USER_PLUGIN_DIRS)
|
||||
ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
|
||||
ALL_PLUGINS = {**SYSTEM_PLUGINS, **USER_PLUGINS}
|
||||
|
||||
# Load ArchiveBox plugins
|
||||
PLUGIN_MANAGER = abx.pm
|
||||
abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
|
||||
PLUGINS = abx.archivebox.reads.get_PLUGINS()
|
||||
abx.load_plugins(ALL_PLUGINS)
|
||||
|
||||
# Load ArchiveBox config from plugins
|
||||
CONFIGS = abx.archivebox.reads.get_CONFIGS()
|
||||
CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
|
||||
BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
|
||||
BINARIES = abx.archivebox.reads.get_BINARIES()
|
||||
EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
|
||||
SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
|
||||
# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
|
||||
# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
|
||||
# # Load ArchiveBox config from plugins
|
||||
|
||||
|
||||
################################################################################
|
||||
|
@ -110,7 +86,7 @@ INSTALLED_APPS = [
|
|||
'api', # Django-Ninja-based Rest API interfaces, config, APIToken model, etc.
|
||||
|
||||
# ArchiveBox plugins
|
||||
*abx.django.use.get_INSTALLED_APPS(), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
|
||||
*abx.as_list(abx.pm.hook.get_INSTALLED_APPS()), # all plugin django-apps found in archivebox/plugins_* and data/user_plugins,
|
||||
|
||||
# 3rd-party apps from PyPI that need to be loaded last
|
||||
'admin_data_views', # handles rendering some convenient automatic read-only views of data in Django admin
|
||||
|
@ -135,7 +111,7 @@ MIDDLEWARE = [
|
|||
'core.middleware.ReverseProxyAuthMiddleware',
|
||||
'django.contrib.messages.middleware.MessageMiddleware',
|
||||
'core.middleware.CacheControlMiddleware',
|
||||
*abx.django.use.get_MIDDLEWARES(),
|
||||
*abx.as_list(abx.pm.hook.get_MIDDLEWARES()),
|
||||
]
|
||||
|
||||
|
||||
|
@ -148,7 +124,7 @@ MIDDLEWARE = [
|
|||
AUTHENTICATION_BACKENDS = [
|
||||
'django.contrib.auth.backends.RemoteUserBackend',
|
||||
'django.contrib.auth.backends.ModelBackend',
|
||||
*abx.django.use.get_AUTHENTICATION_BACKENDS(),
|
||||
*abx.as_list(abx.pm.hook.get_AUTHENTICATION_BACKENDS()),
|
||||
]
|
||||
|
||||
|
||||
|
@ -169,7 +145,7 @@ AUTHENTICATION_BACKENDS = [
|
|||
|
||||
STATIC_URL = '/static/'
|
||||
TEMPLATES_DIR_NAME = 'templates'
|
||||
CUSTOM_TEMPLATES_ENABLED = os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK) and CONSTANTS.CUSTOM_TEMPLATES_DIR.is_dir()
|
||||
CUSTOM_TEMPLATES_ENABLED = os.path.isdir(CONSTANTS.CUSTOM_TEMPLATES_DIR) and os.access(CONSTANTS.CUSTOM_TEMPLATES_DIR, os.R_OK)
|
||||
STATICFILES_DIRS = [
|
||||
*([str(CONSTANTS.CUSTOM_TEMPLATES_DIR / 'static')] if CUSTOM_TEMPLATES_ENABLED else []),
|
||||
# *[
|
||||
|
@ -177,7 +153,7 @@ STATICFILES_DIRS = [
|
|||
# for plugin_dir in PLUGIN_DIRS.values()
|
||||
# if (plugin_dir / 'static').is_dir()
|
||||
# ],
|
||||
*abx.django.use.get_STATICFILES_DIRS(),
|
||||
*abx.as_list(abx.pm.hook.get_STATICFILES_DIRS()),
|
||||
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'static'),
|
||||
]
|
||||
|
||||
|
@ -188,7 +164,7 @@ TEMPLATE_DIRS = [
|
|||
# for plugin_dir in PLUGIN_DIRS.values()
|
||||
# if (plugin_dir / 'templates').is_dir()
|
||||
# ],
|
||||
*abx.django.use.get_TEMPLATE_DIRS(),
|
||||
*abx.as_list(abx.pm.hook.get_TEMPLATE_DIRS()),
|
||||
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'core'),
|
||||
str(PACKAGE_DIR / TEMPLATES_DIR_NAME / 'admin'),
|
||||
str(PACKAGE_DIR / TEMPLATES_DIR_NAME),
|
||||
|
@ -292,7 +268,7 @@ if not IS_GETTING_VERSION_OR_HELP: # dont create queue.sqlite3 file
|
|||
"queues": {
|
||||
HUEY["name"]: HUEY.copy(),
|
||||
# more registered here at plugin import-time by BaseQueue.register()
|
||||
**abx.django.use.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME),
|
||||
**abx.as_dict(abx.pm.hook.get_DJANGO_HUEY_QUEUES(QUEUE_DATABASE_NAME=CONSTANTS.QUEUE_DATABASE_FILENAME)),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -517,7 +493,7 @@ ADMIN_DATA_VIEWS = {
|
|||
"name": "log",
|
||||
},
|
||||
},
|
||||
*abx.django.use.get_ADMIN_DATA_VIEWS_URLS(),
|
||||
*abx.as_list(abx.pm.hook.get_ADMIN_DATA_VIEWS_URLS()),
|
||||
],
|
||||
}
|
||||
|
||||
|
@ -611,7 +587,4 @@ if DEBUG_REQUESTS_TRACKER:
|
|||
# JET_TOKEN = 'some-api-token-here'
|
||||
|
||||
|
||||
abx.django.use.register_checks()
|
||||
# abx.archivebox.reads.register_all_hooks(globals())
|
||||
|
||||
# import ipdb; ipdb.set_trace()
|
||||
|
|
|
@ -1,42 +0,0 @@
|
|||
__package__ = 'plugins_pkg.npm'
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
|
||||
|
||||
from archivebox.config import DATA_DIR, CONSTANTS
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinProvider
|
||||
|
||||
|
||||
|
||||
OLD_NODE_BIN_PATH = DATA_DIR / 'node_modules' / '.bin'
|
||||
NEW_NODE_BIN_PATH = CONSTANTS.DEFAULT_LIB_DIR / 'npm' / 'node_modules' / '.bin'
|
||||
|
||||
|
||||
class SystemNpmBinProvider(NpmProvider, BaseBinProvider):
|
||||
name: BinProviderName = "sys_npm"
|
||||
|
||||
npm_prefix: Optional[Path] = None
|
||||
|
||||
|
||||
class LibNpmBinProvider(NpmProvider, BaseBinProvider):
|
||||
name: BinProviderName = "lib_npm"
|
||||
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||
|
||||
npm_prefix: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'npm'
|
||||
|
||||
def setup(self) -> None:
|
||||
# update paths from config if they arent the default
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||
self.npm_prefix = STORAGE_CONFIG.LIB_DIR / 'npm'
|
||||
self.PATH = f'{STORAGE_CONFIG.LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||
|
||||
super().setup()
|
||||
|
||||
|
||||
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
|
||||
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
|
||||
npm = LIB_NPM_BINPROVIDER
|
4
archivebox/vendor/__init__.py
vendored
4
archivebox/vendor/__init__.py
vendored
|
@ -8,8 +8,8 @@ VENDORED_LIBS = {
|
|||
# sys.path dir: library name
|
||||
#'python-atomicwrites': 'atomicwrites',
|
||||
#'django-taggit': 'taggit',
|
||||
'pydantic-pkgr': 'pydantic_pkgr',
|
||||
'pocket': 'pocket',
|
||||
# 'pydantic-pkgr': 'pydantic_pkgr',
|
||||
# 'pocket': 'pocket',
|
||||
#'base32-crockford': 'base32_crockford',
|
||||
}
|
||||
|
||||
|
|
1
archivebox/vendor/pocket
vendored
1
archivebox/vendor/pocket
vendored
|
@ -1 +0,0 @@
|
|||
Subproject commit e7970b63feafc8941c325111c5ce3706698a18b5
|
1
archivebox/vendor/pydantic-pkgr
vendored
1
archivebox/vendor/pydantic-pkgr
vendored
|
@ -1 +0,0 @@
|
|||
Subproject commit a774f24644ee14f14fa2cc3d8e6e0a585ae00fdd
|
32
click_test.py
Normal file
32
click_test.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import sys
|
||||
import click
|
||||
from rich import print
|
||||
from archivebox.config.django import setup_django
|
||||
|
||||
setup_django()
|
||||
|
||||
import abx.archivebox.writes
|
||||
|
||||
|
||||
def parse_stdin_to_args(io=sys.stdin):
|
||||
for line in io.read().split('\n'):
|
||||
for url_or_id in line.split(' '):
|
||||
if url_or_id.strip():
|
||||
yield url_or_id.strip()
|
||||
|
||||
|
||||
# Gather data from stdin in case using a pipe
|
||||
if not sys.stdin.isatty():
|
||||
sys.argv += parse_stdin_to_args(sys.stdin)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("snapshot_ids_or_urls", type=str, nargs=-1)
|
||||
def extract(snapshot_ids_or_urls):
|
||||
for url_or_snapshot_id in snapshot_ids_or_urls:
|
||||
print('- EXTRACTING', url_or_snapshot_id, file=sys.stderr)
|
||||
for result in abx.archivebox.writes.extract(url_or_snapshot_id):
|
||||
print(result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract()
|
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-archivedotorg-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
|
@ -13,15 +13,15 @@ from pydantic_pkgr import (
|
|||
bin_abspath,
|
||||
)
|
||||
|
||||
import abx.archivebox.reads
|
||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
|
||||
# Depends on Other Plugins:
|
||||
from archivebox.config.common import SHELL_CONFIG
|
||||
from plugins_pkg.puppeteer.binproviders import PUPPETEER_BINPROVIDER
|
||||
from plugins_pkg.playwright.binproviders import PLAYWRIGHT_BINPROVIDER
|
||||
from abx_puppeteer_binprovider.binproviders import PUPPETEER_BINPROVIDER
|
||||
from abx_playwright_binprovider.binproviders import PLAYWRIGHT_BINPROVIDER
|
||||
|
||||
|
||||
from .config import CHROME_CONFIG
|
||||
|
||||
CHROMIUM_BINARY_NAMES_LINUX = [
|
||||
"chromium",
|
||||
"chromium-browser",
|
||||
|
@ -48,12 +48,13 @@ CHROME_BINARY_NAMES_MACOS = [
|
|||
]
|
||||
CHROME_BINARY_NAMES = CHROME_BINARY_NAMES_LINUX + CHROME_BINARY_NAMES_MACOS
|
||||
|
||||
APT_DEPENDENCIES = [
|
||||
'apt-transport-https', 'at-spi2-common', 'chromium-browser',
|
||||
CHROME_APT_DEPENDENCIES = [
|
||||
'apt-transport-https', 'at-spi2-common',
|
||||
'fontconfig', 'fonts-freefont-ttf', 'fonts-ipafont-gothic', 'fonts-kacst', 'fonts-khmeros', 'fonts-liberation', 'fonts-noto', 'fonts-noto-color-emoji', 'fonts-symbola', 'fonts-thai-tlwg', 'fonts-tlwg-loma-otf', 'fonts-unifont', 'fonts-wqy-zenhei',
|
||||
'libasound2', 'libatk-bridge2.0-0', 'libatk1.0-0', 'libatspi2.0-0', 'libavahi-client3', 'libavahi-common-data', 'libavahi-common3', 'libcairo2', 'libcups2',
|
||||
'libdbus-1-3', 'libdrm2', 'libfontenc1', 'libgbm1', 'libglib2.0-0', 'libice6', 'libnspr4', 'libnss3', 'libsm6', 'libunwind8', 'libx11-6', 'libxaw7', 'libxcb1',
|
||||
'libxcomposite1', 'libxdamage1', 'libxext6', 'libxfixes3', 'libxfont2', 'libxkbcommon0', 'libxkbfile1', 'libxmu6', 'libxpm4', 'libxrandr2', 'libxt6', 'x11-utils', 'x11-xkb-utils', 'xfonts-encodings',
|
||||
'chromium-browser',
|
||||
]
|
||||
|
||||
|
||||
|
@ -95,7 +96,7 @@ class ChromeBinary(BaseBinary):
|
|||
'packages': ['chromium'], # playwright install chromium
|
||||
},
|
||||
apt.name: {
|
||||
'packages': APT_DEPENDENCIES,
|
||||
'packages': CHROME_APT_DEPENDENCIES,
|
||||
},
|
||||
brew.name: {
|
||||
'packages': ['--cask', 'chromium'] if platform.system().lower() == 'darwin' else [],
|
||||
|
@ -104,10 +105,9 @@ class ChromeBinary(BaseBinary):
|
|||
|
||||
@staticmethod
|
||||
def symlink_to_lib(binary, bin_dir=None) -> None:
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
bin_dir = bin_dir or STORAGE_CONFIG.LIB_DIR / 'bin'
|
||||
bin_dir = bin_dir or abx.archivebox.reads.get_CONFIGS().STORAGE_CONFIG.LIB_DIR / 'bin'
|
||||
|
||||
if not (binary.abspath and os.access(binary.abspath, os.F_OK)):
|
||||
if not (binary.abspath and os.path.isfile(binary.abspath)):
|
||||
return
|
||||
|
||||
bin_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -121,7 +121,7 @@ class ChromeBinary(BaseBinary):
|
|||
# otherwise on linux we can symlink directly to binary executable
|
||||
symlink.unlink(missing_ok=True)
|
||||
symlink.symlink_to(binary.abspath)
|
||||
except Exception as err:
|
||||
except Exception:
|
||||
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
|
||||
# not actually needed, we can just run without it
|
||||
pass
|
||||
|
@ -132,14 +132,17 @@ class ChromeBinary(BaseBinary):
|
|||
Cleans up any state or runtime files that chrome leaves behind when killed by
|
||||
a timeout or other error
|
||||
"""
|
||||
lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
|
||||
|
||||
if SHELL_CONFIG.IN_DOCKER and os.access(lock_file, os.F_OK):
|
||||
lock_file.unlink()
|
||||
try:
|
||||
linux_lock_file = Path("~/.config/chromium/SingletonLock").expanduser()
|
||||
linux_lock_file.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if CHROME_CONFIG.CHROME_USER_DATA_DIR:
|
||||
if os.access(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock', os.F_OK):
|
||||
lock_file.unlink()
|
||||
try:
|
||||
(CHROME_CONFIG.CHROME_USER_DATA_DIR / 'SingletonLock').unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
7
packages/abx-plugin-chrome-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-chrome-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-chrome-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
7
packages/abx-plugin-curl-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-curl-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-curl-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
|
@ -0,0 +1,24 @@
|
|||
|
||||
import abx
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from pydantic_pkgr import (
|
||||
AptProvider,
|
||||
BrewProvider,
|
||||
EnvProvider,
|
||||
BinProvider,
|
||||
)
|
||||
apt = APT_BINPROVIDER = AptProvider()
|
||||
brew = BREW_BINPROVIDER = BrewProvider()
|
||||
env = ENV_BINPROVIDER = EnvProvider()
|
||||
|
||||
|
||||
@abx.hookimpl(tryfirst=True)
|
||||
def get_BINPROVIDERS() -> Dict[str, BinProvider]:
|
||||
|
||||
return {
|
||||
'apt': APT_BINPROVIDER,
|
||||
'brew': BREW_BINPROVIDER,
|
||||
'env': ENV_BINPROVIDER,
|
||||
}
|
18
packages/abx-plugin-default-binproviders/pyproject.toml
Normal file
18
packages/abx-plugin-default-binproviders/pyproject.toml
Normal file
|
@ -0,0 +1,18 @@
|
|||
[project]
|
||||
name = "abx-plugin-default-binproviders"
|
||||
version = "2024.10.24"
|
||||
description = "Default BinProviders for ABX (apt, brew, env)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"abx>=0.1.0",
|
||||
"pydantic-pkgr>=0.5.4",
|
||||
"abx-spec-pydantic-pkgr>=0.1.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project.entry-points.abx]
|
||||
abx_plugin_default_binproviders = "abx_plugin_default_binproviders"
|
0
packages/abx-plugin-favicon-extractor/README.md
Normal file
0
packages/abx-plugin-favicon-extractor/README.md
Normal file
7
packages/abx-plugin-favicon-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-favicon-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-favicon-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-git-extractor/README.md
Normal file
0
packages/abx-plugin-git-extractor/README.md
Normal file
7
packages/abx-plugin-git-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-git-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-git-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-htmltotext-extractor/README.md
Normal file
0
packages/abx-plugin-htmltotext-extractor/README.md
Normal file
7
packages/abx-plugin-htmltotext-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-htmltotext-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-htmltotext-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-ldap-auth/README.md
Normal file
0
packages/abx-plugin-ldap-auth/README.md
Normal file
22
packages/abx-plugin-ldap-auth/pyproject.toml
Normal file
22
packages/abx-plugin-ldap-auth/pyproject.toml
Normal file
|
@ -0,0 +1,22 @@
|
|||
[project]
|
||||
name = "abx-ldap-auth"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
||||
|
||||
|
||||
[project.entry-points.abx]
|
||||
ldap = "abx_ldap_auth"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.hatch.build.targets.sdist]
|
||||
packages = ["."]
|
||||
|
||||
[tool.hatch.build.targets.wheel]
|
||||
packages = ["."]
|
0
packages/abx-plugin-mercury-extractor/README.md
Normal file
0
packages/abx-plugin-mercury-extractor/README.md
Normal file
7
packages/abx-plugin-mercury-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-mercury-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-mercury-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-npm-binprovider/README.md
Normal file
0
packages/abx-plugin-npm-binprovider/README.md
Normal file
|
@ -1,26 +1,12 @@
|
|||
__package__ = 'plugins_pkg.npm'
|
||||
__version__ = '2024.10.14'
|
||||
__package__ = 'abx_plugin_npm_binprovider'
|
||||
__id__ = 'npm'
|
||||
__label__ = 'npm'
|
||||
__label__ = 'NPM'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://www.npmjs.com/'
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
__id__: {
|
||||
'id': __id__,
|
||||
'package': __package__,
|
||||
'label': __label__,
|
||||
'version': __version__,
|
||||
'author': __author__,
|
||||
'homepage': __homepage__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import NPM_CONFIG
|
|
@ -4,14 +4,19 @@ __package__ = 'plugins_pkg.npm'
|
|||
from typing import List
|
||||
|
||||
from pydantic import InstanceOf
|
||||
from benedict import benedict
|
||||
|
||||
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides
|
||||
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
|
||||
|
||||
from abx_plugin_default_binproviders import get_BINPROVIDERS
|
||||
|
||||
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
|
||||
env = DEFAULT_BINPROVIDERS.env
|
||||
apt = DEFAULT_BINPROVIDERS.apt
|
||||
brew = DEFAULT_BINPROVIDERS.brew
|
||||
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinary, env, apt, brew
|
||||
|
||||
|
||||
class NodeBinary(BaseBinary):
|
||||
class NodeBinary(Binary):
|
||||
name: BinName = 'node'
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
|
||||
|
||||
|
@ -23,7 +28,7 @@ class NodeBinary(BaseBinary):
|
|||
NODE_BINARY = NodeBinary()
|
||||
|
||||
|
||||
class NpmBinary(BaseBinary):
|
||||
class NpmBinary(Binary):
|
||||
name: BinName = 'npm'
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
|
||||
|
||||
|
@ -35,7 +40,7 @@ class NpmBinary(BaseBinary):
|
|||
NPM_BINARY = NpmBinary()
|
||||
|
||||
|
||||
class NpxBinary(BaseBinary):
|
||||
class NpxBinary(Binary):
|
||||
name: BinName = 'npx'
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [apt, brew, env]
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
|
||||
|
||||
import abx
|
||||
|
||||
DEFAULT_LIB_NPM_DIR = Path('/usr/local/share/abx/npm')
|
||||
|
||||
OLD_NODE_BIN_PATH = Path(os.getcwd()) / 'node_modules' / '.bin'
|
||||
NEW_NODE_BIN_PATH = DEFAULT_LIB_NPM_DIR / 'node_modules' / '.bin'
|
||||
|
||||
|
||||
class SystemNpmBinProvider(NpmProvider):
|
||||
name: BinProviderName = "sys_npm"
|
||||
|
||||
npm_prefix: Optional[Path] = None
|
||||
|
||||
|
||||
class LibNpmBinProvider(NpmProvider):
|
||||
name: BinProviderName = "lib_npm"
|
||||
PATH: PATHStr = f'{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||
|
||||
npm_prefix: Optional[Path] = DEFAULT_LIB_NPM_DIR
|
||||
|
||||
def setup(self) -> None:
|
||||
# update paths from config at runtime
|
||||
LIB_DIR = abx.pm.hook.get_CONFIG().LIB_DIR
|
||||
|
||||
self.npm_prefix = LIB_DIR / 'npm'
|
||||
self.PATH = f'{LIB_DIR / "npm" / "node_modules" / ".bin"}:{NEW_NODE_BIN_PATH}:{OLD_NODE_BIN_PATH}'
|
||||
|
||||
super().setup()
|
||||
|
||||
|
||||
SYS_NPM_BINPROVIDER = SystemNpmBinProvider()
|
||||
LIB_NPM_BINPROVIDER = LibNpmBinProvider()
|
||||
npm = LIB_NPM_BINPROVIDER
|
|
@ -1,7 +1,4 @@
|
|||
__package__ = 'plugins_pkg.npm'
|
||||
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
from abx_spec_config import BaseConfigSet
|
||||
|
||||
|
||||
###################### Config ##########################
|
20
packages/abx-plugin-npm-binprovider/pyproject.toml
Normal file
20
packages/abx-plugin-npm-binprovider/pyproject.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[project]
|
||||
name = "abx-plugin-npm-binprovider"
|
||||
version = "2024.10.24"
|
||||
description = "NPM binary provider plugin for ABX"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"abx>=0.1.0",
|
||||
"pydantic-pkgr>=0.5.4",
|
||||
"abx-spec-pydantic-pkgr>=0.1.0",
|
||||
"abx-spec-config>=0.1.0",
|
||||
"abx-plugin-default-binproviders>=2024.10.24",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project.entry-points.abx]
|
||||
abx_plugin_npm_binprovider = "abx_plugin_npm_binprovider"
|
0
packages/abx-plugin-pip-binprovider/README.md
Normal file
0
packages/abx-plugin-pip-binprovider/README.md
Normal file
|
@ -1,33 +1,19 @@
|
|||
__package__ = 'plugins_pkg.pip'
|
||||
__label__ = 'pip'
|
||||
__version__ = '2024.10.14'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/pypa/pip'
|
||||
__package__ = 'abx_plugin_pip_binprovider'
|
||||
__id__ = 'pip'
|
||||
__label__ = 'PIP'
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
'pip': {
|
||||
'PACKAGE': __package__,
|
||||
'LABEL': __label__,
|
||||
'VERSION': __version__,
|
||||
'AUTHOR': __author__,
|
||||
'HOMEPAGE': __homepage__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import PIP_CONFIG
|
||||
|
||||
return {
|
||||
'pip': PIP_CONFIG
|
||||
__id__: PIP_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
@abx.hookimpl(tryfirst=True)
|
||||
def get_BINARIES():
|
||||
from .binaries import ARCHIVEBOX_BINARY, PYTHON_BINARY, DJANGO_BINARY, SQLITE_BINARY, PIP_BINARY, PIPX_BINARY
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
__package__ = 'plugins_pkg.pip'
|
||||
__package__ = 'abx_plugin_pip_binprovider'
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
@ -9,29 +9,30 @@ from pydantic import InstanceOf, Field, model_validator
|
|||
import django
|
||||
import django.db.backends.sqlite3.base
|
||||
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
|
||||
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, SemVer
|
||||
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
|
||||
|
||||
from archivebox import VERSION
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider, env, apt, brew
|
||||
|
||||
from archivebox.misc.logging import hint
|
||||
|
||||
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
|
||||
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
def get_archivebox_version():
|
||||
try:
|
||||
from archivebox import VERSION
|
||||
return VERSION
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class ArchiveboxBinary(BaseBinary):
|
||||
class ArchiveboxBinary(Binary):
|
||||
name: BinName = 'archivebox'
|
||||
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||
overrides: BinaryOverrides = {
|
||||
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
|
||||
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': VERSION},
|
||||
apt.name: {'packages': [], 'version': VERSION},
|
||||
brew.name: {'packages': [], 'version': VERSION},
|
||||
VENV_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
|
||||
SYS_PIP_BINPROVIDER.name: {'packages': [], 'version': get_archivebox_version},
|
||||
apt.name: {'packages': [], 'version': get_archivebox_version},
|
||||
brew.name: {'packages': [], 'version': get_archivebox_version},
|
||||
}
|
||||
|
||||
# @validate_call
|
||||
|
@ -45,7 +46,7 @@ class ArchiveboxBinary(BaseBinary):
|
|||
ARCHIVEBOX_BINARY = ArchiveboxBinary()
|
||||
|
||||
|
||||
class PythonBinary(BaseBinary):
|
||||
class PythonBinary(Binary):
|
||||
name: BinName = 'python'
|
||||
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||
|
@ -71,9 +72,9 @@ LOADED_SQLITE_PATH = Path(django.db.backends.sqlite3.base.__file__)
|
|||
LOADED_SQLITE_VERSION = SemVer(django_sqlite3.version)
|
||||
LOADED_SQLITE_FROM_VENV = str(LOADED_SQLITE_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
|
||||
|
||||
class SqliteBinary(BaseBinary):
|
||||
class SqliteBinary(Binary):
|
||||
name: BinName = 'sqlite'
|
||||
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||
overrides: BinaryOverrides = {
|
||||
VENV_PIP_BINPROVIDER.name: {
|
||||
"abspath": LOADED_SQLITE_PATH if LOADED_SQLITE_FROM_VENV else None,
|
||||
|
@ -93,10 +94,10 @@ class SqliteBinary(BaseBinary):
|
|||
cursor.execute('SELECT JSON(\'{"a": "b"}\')')
|
||||
except django_sqlite3.OperationalError as exc:
|
||||
print(f'[red][X] Your SQLite3 version is missing the required JSON1 extension: {exc}[/red]')
|
||||
hint([
|
||||
'Upgrade your Python version or install the extension manually:',
|
||||
'https://code.djangoproject.com/wiki/JSON1Extension'
|
||||
])
|
||||
print(
|
||||
'[violet]Hint:[/violet] Upgrade your Python version or install the extension manually:\n' +
|
||||
' https://code.djangoproject.com/wiki/JSON1Extension\n'
|
||||
)
|
||||
return self
|
||||
|
||||
# @validate_call
|
||||
|
@ -114,10 +115,10 @@ LOADED_DJANGO_PATH = Path(django.__file__)
|
|||
LOADED_DJANGO_VERSION = SemVer(django.VERSION[:3])
|
||||
LOADED_DJANGO_FROM_VENV = str(LOADED_DJANGO_PATH.absolute().resolve()).startswith(str(VENV_PIP_BINPROVIDER.pip_venv and VENV_PIP_BINPROVIDER.pip_venv.absolute().resolve()))
|
||||
|
||||
class DjangoBinary(BaseBinary):
|
||||
class DjangoBinary(Binary):
|
||||
name: BinName = 'django'
|
||||
|
||||
binproviders_supported: List[InstanceOf[BaseBinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = Field(default=[VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER])
|
||||
overrides: BinaryOverrides = {
|
||||
VENV_PIP_BINPROVIDER.name: {
|
||||
"abspath": LOADED_DJANGO_PATH if LOADED_DJANGO_FROM_VENV else None,
|
||||
|
@ -139,7 +140,7 @@ class DjangoBinary(BaseBinary):
|
|||
|
||||
DJANGO_BINARY = DjangoBinary()
|
||||
|
||||
class PipBinary(BaseBinary):
|
||||
class PipBinary(Binary):
|
||||
name: BinName = "pip"
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||
|
||||
|
@ -154,7 +155,7 @@ class PipBinary(BaseBinary):
|
|||
PIP_BINARY = PipBinary()
|
||||
|
||||
|
||||
class PipxBinary(BaseBinary):
|
||||
class PipxBinary(Binary):
|
||||
name: BinName = "pipx"
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, apt, brew, env]
|
||||
|
|
@ -1,21 +1,26 @@
|
|||
__package__ = 'plugins_pkg.pip'
|
||||
|
||||
import os
|
||||
import sys
|
||||
import site
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from benedict import benedict
|
||||
|
||||
from pydantic_pkgr import PipProvider, BinName, BinProviderName
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
import abx
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinProvider
|
||||
from abx_plugin_default_binproviders import get_BINPROVIDERS
|
||||
|
||||
DEFAULT_BINPROVIDERS = benedict(get_BINPROVIDERS())
|
||||
env = DEFAULT_BINPROVIDERS.env
|
||||
apt = DEFAULT_BINPROVIDERS.apt
|
||||
brew = DEFAULT_BINPROVIDERS.brew
|
||||
|
||||
|
||||
###################### Config ##########################
|
||||
|
||||
class SystemPipBinProvider(PipProvider, BaseBinProvider):
|
||||
class SystemPipBinProvider(PipProvider):
|
||||
name: BinProviderName = "sys_pip"
|
||||
INSTALLER_BIN: BinName = "pip"
|
||||
|
||||
|
@ -25,7 +30,7 @@ class SystemPipBinProvider(PipProvider, BaseBinProvider):
|
|||
# never modify system pip packages
|
||||
return 'refusing to install packages globally with system pip, use a venv instead'
|
||||
|
||||
class SystemPipxBinProvider(PipProvider, BaseBinProvider):
|
||||
class SystemPipxBinProvider(PipProvider):
|
||||
name: BinProviderName = "pipx"
|
||||
INSTALLER_BIN: BinName = "pipx"
|
||||
|
||||
|
@ -34,7 +39,7 @@ class SystemPipxBinProvider(PipProvider, BaseBinProvider):
|
|||
|
||||
IS_INSIDE_VENV = sys.prefix != sys.base_prefix
|
||||
|
||||
class VenvPipBinProvider(PipProvider, BaseBinProvider):
|
||||
class VenvPipBinProvider(PipProvider):
|
||||
name: BinProviderName = "venv_pip"
|
||||
INSTALLER_BIN: BinName = "pip"
|
||||
|
||||
|
@ -45,18 +50,16 @@ class VenvPipBinProvider(PipProvider, BaseBinProvider):
|
|||
return None
|
||||
|
||||
|
||||
class LibPipBinProvider(PipProvider, BaseBinProvider):
|
||||
class LibPipBinProvider(PipProvider):
|
||||
name: BinProviderName = "lib_pip"
|
||||
INSTALLER_BIN: BinName = "pip"
|
||||
|
||||
pip_venv: Optional[Path] = CONSTANTS.DEFAULT_LIB_DIR / 'pip' / 'venv'
|
||||
pip_venv: Optional[Path] = Path('/usr/local/share/abx/pip/venv')
|
||||
|
||||
def setup(self) -> None:
|
||||
# update paths from config if they arent the default
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||
self.pip_venv = STORAGE_CONFIG.LIB_DIR / 'pip' / 'venv'
|
||||
|
||||
# update venv path to match most up-to-date LIB_DIR based on runtime config
|
||||
LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
|
||||
self.pip_venv = LIB_DIR / 'pip' / 'venv'
|
||||
super().setup()
|
||||
|
||||
SYS_PIP_BINPROVIDER = SystemPipBinProvider()
|
22
packages/abx-plugin-pip-binprovider/pyproject.toml
Normal file
22
packages/abx-plugin-pip-binprovider/pyproject.toml
Normal file
|
@ -0,0 +1,22 @@
|
|||
[project]
|
||||
name = "abx-plugin-pip-binprovider"
|
||||
version = "2024.10.24"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"abx>=0.1.0",
|
||||
"pydantic-pkgr>=0.5.4",
|
||||
"abx-spec-config>=0.1.0",
|
||||
"abx-spec-pydantic-pkgr>=0.1.0",
|
||||
"abx-plugin-default-binproviders>=2024.10.24",
|
||||
"django>=5.0.0",
|
||||
]
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project.entry-points.abx]
|
||||
abx_plugin_pip_binprovider = "abx_plugin_pip_binprovider"
|
0
packages/abx-plugin-playwright-binprovider/README.md
Normal file
0
packages/abx-plugin-playwright-binprovider/README.md
Normal file
|
@ -1,30 +1,18 @@
|
|||
__package__ = 'plugins_pkg.playwright'
|
||||
__label__ = 'playwright'
|
||||
__version__ = '2024.10.14'
|
||||
__package__ = 'abx_plugin_playwright_binprovider'
|
||||
__id__ = 'playwright'
|
||||
__label__ = 'Playwright'
|
||||
__author__ = 'ArchiveBox'
|
||||
__homepage__ = 'https://github.com/microsoft/playwright-python'
|
||||
|
||||
import abx
|
||||
|
||||
|
||||
@abx.hookimpl
|
||||
def get_PLUGIN():
|
||||
return {
|
||||
'playwright': {
|
||||
'PACKAGE': __package__,
|
||||
'LABEL': __label__,
|
||||
'VERSION': __version__,
|
||||
'AUTHOR': __author__,
|
||||
'HOMEPAGE': __homepage__,
|
||||
}
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
||||
def get_CONFIG():
|
||||
from .config import PLAYWRIGHT_CONFIG
|
||||
|
||||
return {
|
||||
'playwright': PLAYWRIGHT_CONFIG
|
||||
__id__: PLAYWRIGHT_CONFIG
|
||||
}
|
||||
|
||||
@abx.hookimpl
|
|
@ -1,20 +1,18 @@
|
|||
__package__ = 'plugins_pkg.playwright'
|
||||
__package__ = 'abx_plugin_playwright_binprovider'
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import InstanceOf
|
||||
from pydantic_pkgr import BinName, BinProvider
|
||||
from pydantic_pkgr import BinName, BinProvider, Binary
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinary, env
|
||||
|
||||
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER
|
||||
from abx_plugin_pip_binprovider.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER
|
||||
from abx_plugin_default_binproviders import env
|
||||
|
||||
from .config import PLAYWRIGHT_CONFIG
|
||||
|
||||
|
||||
|
||||
|
||||
class PlaywrightBinary(BaseBinary):
|
||||
class PlaywrightBinary(Binary):
|
||||
name: BinName = PLAYWRIGHT_CONFIG.PLAYWRIGHT_BINARY
|
||||
|
||||
binproviders_supported: List[InstanceOf[BinProvider]] = [LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env]
|
|
@ -1,6 +1,7 @@
|
|||
__package__ = 'plugins_pkg.playwright'
|
||||
__package__ = 'abx_plugin_playwright_binprovider'
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import platform
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, ClassVar
|
||||
|
@ -8,6 +9,7 @@ from typing import List, Optional, Dict, ClassVar
|
|||
from pydantic import computed_field, Field
|
||||
from pydantic_pkgr import (
|
||||
BinName,
|
||||
BinProvider,
|
||||
BinProviderName,
|
||||
BinProviderOverrides,
|
||||
InstallArgs,
|
||||
|
@ -18,11 +20,8 @@ from pydantic_pkgr import (
|
|||
DEFAULT_ENV_PATH,
|
||||
)
|
||||
|
||||
from archivebox.config import CONSTANTS
|
||||
import abx
|
||||
|
||||
from abx.archivebox.base_binary import BaseBinProvider, env
|
||||
|
||||
from plugins_pkg.pip.binproviders import SYS_PIP_BINPROVIDER
|
||||
|
||||
from .binaries import PLAYWRIGHT_BINARY
|
||||
|
||||
|
@ -31,11 +30,11 @@ MACOS_PLAYWRIGHT_CACHE_DIR: Path = Path("~/Library/Caches/ms-playwright")
|
|||
LINUX_PLAYWRIGHT_CACHE_DIR: Path = Path("~/.cache/ms-playwright")
|
||||
|
||||
|
||||
class PlaywrightBinProvider(BaseBinProvider):
|
||||
class PlaywrightBinProvider(BinProvider):
|
||||
name: BinProviderName = "playwright"
|
||||
INSTALLER_BIN: BinName = PLAYWRIGHT_BINARY.name
|
||||
|
||||
PATH: PATHStr = f"{CONSTANTS.DEFAULT_LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||
PATH: PATHStr = f"{Path('/usr/share/abx') / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||
|
||||
playwright_browsers_dir: Path = (
|
||||
MACOS_PLAYWRIGHT_CACHE_DIR.expanduser()
|
||||
|
@ -59,12 +58,12 @@ class PlaywrightBinProvider(BaseBinProvider):
|
|||
return None
|
||||
|
||||
def setup(self) -> None:
|
||||
# update paths from config if they arent the default
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
if STORAGE_CONFIG.LIB_DIR != CONSTANTS.DEFAULT_LIB_DIR:
|
||||
self.PATH = f"{STORAGE_CONFIG.LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||
# update paths from config at runtime
|
||||
LIB_DIR = abx.pm.hook.get_FLAT_CONFIG().LIB_DIR
|
||||
|
||||
assert SYS_PIP_BINPROVIDER.INSTALLER_BIN_ABSPATH, "Pip bin provider not initialized"
|
||||
self.PATH = f"{LIB_DIR / 'bin'}:{DEFAULT_ENV_PATH}"
|
||||
|
||||
assert shutil.which('pip'), "Pip bin provider not initialized"
|
||||
|
||||
if self.playwright_browsers_dir:
|
||||
self.playwright_browsers_dir.mkdir(parents=True, exist_ok=True)
|
|
@ -1,7 +1,4 @@
|
|||
__package__ = 'playwright'
|
||||
|
||||
from abx.archivebox.base_configset import BaseConfigSet
|
||||
|
||||
from abx_spec_config import BaseConfigSet
|
||||
|
||||
class PlaywrightConfigs(BaseConfigSet):
|
||||
PLAYWRIGHT_BINARY: str = 'playwright'
|
20
packages/abx-plugin-playwright-binprovider/pyproject.toml
Normal file
20
packages/abx-plugin-playwright-binprovider/pyproject.toml
Normal file
|
@ -0,0 +1,20 @@
|
|||
[project]
|
||||
name = "abx-plugin-playwright-binprovider"
|
||||
version = "2024.10.24"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = [
|
||||
"abx>=0.1.0",
|
||||
"pydantic>=2.4.2",
|
||||
"pydantic-pkgr>=0.5.4",
|
||||
"abx-spec-pydantic-pkgr>=0.1.0",
|
||||
"abx-spec-config>=0.1.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[project.entry-points.abx]
|
||||
abx_plugin_playwright_binprovider = "abx_plugin_playwright_binprovider"
|
0
packages/abx-plugin-pocket-extractor/README.md
Normal file
0
packages/abx-plugin-pocket-extractor/README.md
Normal file
7
packages/abx-plugin-pocket-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-pocket-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-pocket-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-puppeteer-binprovider/README.md
Normal file
0
packages/abx-plugin-puppeteer-binprovider/README.md
Normal file
|
@ -42,7 +42,8 @@ class PuppeteerBinProvider(BaseBinProvider):
|
|||
_browser_abspaths: ClassVar[Dict[str, HostBinPath]] = {}
|
||||
|
||||
def setup(self) -> None:
|
||||
# update paths from config
|
||||
# update paths from config, don't do this lazily because we dont want to import archivebox.config.common at import-time
|
||||
# we want to avoid depending on archivebox from abx code if at all possible
|
||||
from archivebox.config.common import STORAGE_CONFIG
|
||||
self.puppeteer_browsers_dir = STORAGE_CONFIG.LIB_DIR / 'browsers'
|
||||
self.PATH = str(STORAGE_CONFIG.LIB_DIR / 'bin')
|
7
packages/abx-plugin-puppeteer-binprovider/pyproject.toml
Normal file
7
packages/abx-plugin-puppeteer-binprovider/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-puppeteer-binprovider"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-readability-extractor/README.md
Normal file
0
packages/abx-plugin-readability-extractor/README.md
Normal file
7
packages/abx-plugin-readability-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-readability-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-readability-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
0
packages/abx-plugin-readwise-extractor/README.md
Normal file
0
packages/abx-plugin-readwise-extractor/README.md
Normal file
7
packages/abx-plugin-readwise-extractor/pyproject.toml
Normal file
7
packages/abx-plugin-readwise-extractor/pyproject.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[project]
|
||||
name = "abx-readwise-extractor"
|
||||
version = "0.1.0"
|
||||
description = "Add your description here"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
dependencies = []
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue