fix plugin loading order, admin, abx-pkg

This commit is contained in:
Nick Sweeting 2024-11-16 06:43:06 -08:00
parent 210fd935d7
commit c8e186f21b
No known key found for this signature in database
78 changed files with 801 additions and 987 deletions

View file

@ -5,12 +5,10 @@ __command__ = 'archivebox update'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import List, Optional, IO from typing import List, Optional, IO
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from archivebox.config import DATA_DIR from archivebox.index import (
from ..index import (
LINK_FILTERS, LINK_FILTERS,
get_indexed_folders, get_indexed_folders,
get_archived_folders, get_archived_folders,
@ -23,8 +21,16 @@ from ..index import (
get_corrupted_folders, get_corrupted_folders,
get_unrecognized_folders, get_unrecognized_folders,
) )
from ..logging_util import SmartFormatter, accept_stdin from archivebox.logging_util import SmartFormatter, accept_stdin
from ..main import update # from ..main import update
def update():
from archivebox.config.django import setup_django
setup_django()
from actors.orchestrator import Orchestrator
orchestrator = Orchestrator()
orchestrator.start()
@docstring(update.__doc__) @docstring(update.__doc__)
@ -116,20 +122,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
if not command.filter_patterns: if not command.filter_patterns:
filter_patterns_str = accept_stdin(stdin) filter_patterns_str = accept_stdin(stdin)
update( update()
resume=command.resume,
only_new=command.only_new, # update(
index_only=command.index_only, # resume=command.resume,
overwrite=command.overwrite, # only_new=command.only_new,
filter_patterns_str=filter_patterns_str, # index_only=command.index_only,
filter_patterns=command.filter_patterns, # overwrite=command.overwrite,
filter_type=command.filter_type, # filter_patterns_str=filter_patterns_str,
status=command.status, # filter_patterns=command.filter_patterns,
after=command.after, # filter_type=command.filter_type,
before=command.before, # status=command.status,
out_dir=Path(pwd) if pwd else DATA_DIR, # after=command.after,
extractors=command.extract, # before=command.before,
) # out_dir=Path(pwd) if pwd else DATA_DIR,
# extractors=command.extract,
# )
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -1,4 +1,4 @@
__package__ = 'config' __package__ = 'archivebox.config'
__order__ = 200 __order__ = 200
from .paths import ( from .paths import (

View file

@ -120,6 +120,8 @@ class ArchivingConfig(BaseConfigSet):
SAVE_ALLOWLIST: Dict[str, List[str]] = Field(default={}) # mapping of regex patterns to list of archive methods SAVE_ALLOWLIST: Dict[str, List[str]] = Field(default={}) # mapping of regex patterns to list of archive methods
SAVE_DENYLIST: Dict[str, List[str]] = Field(default={}) SAVE_DENYLIST: Dict[str, List[str]] = Field(default={})
DEFAULT_PERSONA: str = Field(default='Default')
# GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht') # GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
# WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}') # WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
# CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}') # CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')

View file

@ -86,10 +86,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
} }
for plugin_id, plugin in abx.get_all_plugins().items(): for plugin_id, plugin in abx.get_all_plugins().items():
if not plugin.hooks.get('get_BINARIES'): plugin = benedict(plugin)
if not hasattr(plugin.plugin, 'get_BINARIES'):
continue continue
for binary in plugin.hooks.get_BINARIES().values(): for binary in plugin.plugin.get_BINARIES().values():
try: try:
installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary) installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
binary = installed_binary.load_from_db() binary = installed_binary.load_from_db()
@ -214,9 +215,9 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
return 'black' return 'black'
for plugin_id, plugin in abx.get_all_plugins().items(): for plugin_id, plugin in abx.get_all_plugins().items():
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {}) plugin.hooks.get_BINPROVIDERS = getattr(plugin.plugin, 'get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {}) plugin.hooks.get_BINARIES = getattr(plugin.plugin, 'get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {}) plugin.hooks.get_CONFIG = getattr(plugin.plugin, 'get_CONFIG', lambda: {})
rows['Label'].append(ItemLink(plugin.label, key=plugin.package)) rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
rows['Version'].append(str(plugin.version)) rows['Version'].append(str(plugin.version))
@ -251,8 +252,10 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
plugins = abx.get_all_plugins()
plugin_id = None plugin_id = None
for check_plugin_id, loaded_plugin in settings.PLUGINS.items(): for check_plugin_id, loaded_plugin in plugins.items():
if check_plugin_id.split('.')[-1] == key.split('.')[-1]: if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
plugin_id = check_plugin_id plugin_id = check_plugin_id
break break

View file

@ -1,5 +1,5 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
__order__ = 100
import abx import abx
@abx.hookimpl @abx.hookimpl

View file

@ -21,7 +21,7 @@ class SnapshotActor(ActorType[Snapshot]):
FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states # ['sealed'] FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states # ['sealed']
STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name # status STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name # status
MAX_CONCURRENT_ACTORS: ClassVar[int] = 3 MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 3
MAX_TICK_TIME: ClassVar[int] = 10 MAX_TICK_TIME: ClassVar[int] = 10
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
@ -39,7 +39,7 @@ class ArchiveResultActor(ActorType[ArchiveResult]):
FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states # ['succeeded', 'failed', 'skipped'] FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states # ['succeeded', 'failed', 'skipped']
STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name # status STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name # status
MAX_CONCURRENT_ACTORS: ClassVar[int] = 6 MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 6
MAX_TICK_TIME: ClassVar[int] = 60 MAX_TICK_TIME: ClassVar[int] = 60
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

View file

@ -39,7 +39,7 @@ class ArchiveResultInline(admin.TabularInline):
extra = 0 extra = 0
sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version') sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
readonly_fields = ('id', 'result_id', 'completed', 'command', 'version') readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output') fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'retry_at', 'output')
# exclude = ('id',) # exclude = ('id',)
ordering = ('end_ts',) ordering = ('end_ts',)
show_change_link = True show_change_link = True
@ -105,11 +105,11 @@ class ArchiveResultInline(admin.TabularInline):
class ArchiveResultAdmin(ABIDModelAdmin): class ArchiveResultAdmin(ABIDModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str') list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status') sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary') readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp') search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields) fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
autocomplete_fields = ['snapshot'] autocomplete_fields = ['snapshot']
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version') list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@ -169,7 +169,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
result.output, result.output,
) )
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp)) output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
path_from_output_str = (snapshot_dir / result.output) path_from_output_str = (snapshot_dir / (result.output or ''))
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output)) output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
if os.access(path_from_output_str, os.R_OK): if os.access(path_from_output_str, os.R_OK):
root_dir = str(path_from_output_str) root_dir = str(path_from_output_str)

View file

@ -56,12 +56,12 @@ class SnapshotActionForm(ActionForm):
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin): class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
list_display = ('created_at', 'title_str', 'files', 'size', 'url_str', 'crawl') list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'created_at', 'crawl') sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir') readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name') search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name') list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'crawl', *readonly_fields) fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
ordering = ['-created_at'] ordering = ['-created_at']
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots'] actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
inlines = [TagInline, ArchiveResultInline] inlines = [TagInline, ArchiveResultInline]

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core' __package__ = 'archivebox.core'
from typing import Optional, Dict, Iterable from typing import Optional, Dict, Iterable, Any
from django_stubs_ext.db.models import TypedModelMeta from django_stubs_ext.db.models import TypedModelMeta
import os import os
@ -20,20 +20,22 @@ from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin from django.contrib import admin
from django.conf import settings from django.conf import settings
from actors.models import ModelWithStateMachine
import abx
from archivebox.config import CONSTANTS from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from actors.models import ModelWithStateMachine
from queues.tasks import bg_archive_snapshot from queues.tasks import bg_archive_snapshot
from crawls.models import Crawl from crawls.models import Crawl
# from machine.models import Machine, NetworkInterface # from machine.models import Machine, NetworkInterface
from archivebox.misc.system import get_dir_size from archivebox.misc.system import get_dir_size
from archivebox.misc.util import parse_date, base_url from archivebox.misc.util import parse_date, base_url
from ..index.schema import Link from archivebox.index.schema import Link
from ..index.html import snapshot_icons from archivebox.index.html import snapshot_icons
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS from archivebox.extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE
# class BaseModel(models.Model): # class BaseModel(models.Model):
@ -195,13 +197,21 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag')) tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
title = models.CharField(max_length=512, null=True, blank=True, db_index=True) title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at') # config = models.JSONField(default=dict, null=False, blank=False, editable=True)
keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at', 'created_at', 'status', 'retry_at', 'abid', 'id')
archiveresult_set: models.Manager['ArchiveResult'] archiveresult_set: models.Manager['ArchiveResult']
objects = SnapshotManager() objects = SnapshotManager()
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if self.pk:
existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
if self.as_json() != existing_snapshot.as_json():
raise Exception(f'Snapshot {self.pk} is already sealed, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_snapshot.as_json()}')
if not self.bookmarked_at: if not self.bookmarked_at:
self.bookmarked_at = self.created_at or self._init_timestamp self.bookmarked_at = self.created_at or self._init_timestamp
@ -427,7 +437,7 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget'] ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
# config = get_scope_config(snapshot=self) # config = get_scope_config(snapshot=self)
config = {'EXTRACTORS': ''} config = {'EXTRACTORS': ','.join(ALL_EXTRACTORS)}
if config.get('EXTRACTORS', 'auto') == 'auto': if config.get('EXTRACTORS', 'auto') == 'auto':
EXTRACTORS = ALL_EXTRACTORS EXTRACTORS = ALL_EXTRACTORS
@ -438,10 +448,13 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
for extractor in EXTRACTORS: for extractor in EXTRACTORS:
if not extractor: if not extractor:
continue continue
archiveresult, _created = ArchiveResult.objects.get_or_create( archiveresult = ArchiveResult.objects.update_or_create(
snapshot=self, snapshot=self,
extractor=extractor, extractor=extractor,
status=ArchiveResult.INITIAL_STATE, status=ArchiveResult.INITIAL_STATE,
defaults={
'retry_at': timezone.now(),
},
) )
archiveresults.append(archiveresult) archiveresults.append(archiveresult)
return archiveresults return archiveresults
@ -561,6 +574,8 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
objects = ArchiveResultManager() objects = ArchiveResultManager()
keys = ('snapshot_id', 'extractor', 'cmd', 'pwd', 'cmd_version', 'output', 'start_ts', 'end_ts', 'created_at', 'status', 'retry_at', 'abid', 'id')
class Meta(TypedModelMeta): class Meta(TypedModelMeta):
verbose_name = 'Archive Result' verbose_name = 'Archive Result'
verbose_name_plural = 'Archive Results Log' verbose_name_plural = 'Archive Results Log'
@ -577,6 +592,16 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
def __str__(self): def __str__(self):
return repr(self) return repr(self)
def save(self, *args, **kwargs):
# if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
# raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
if self.pk:
existing_archiveresult = self.__class__.objects.filter(pk=self.pk).first()
if existing_archiveresult and existing_archiveresult.status in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]:
if self.as_json() != existing_archiveresult.as_json():
raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_archiveresult.as_json()}')
super().save(*args, **kwargs)
# TODO: finish connecting machine.models # TODO: finish connecting machine.models
# @cached_property # @cached_property
# def machine(self): # def machine(self):
@ -603,36 +628,53 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
return f'/{self.snapshot.archive_path}/{self.output_path()}' return f'/{self.snapshot.archive_path}/{self.output_path()}'
@property @property
def extractor_module(self): def extractor_module(self) -> Any | None:
return EXTRACTORS[self.extractor] return abx.as_dict(abx.pm.hook.get_EXTRACTORS()).get(self.extractor, None)
def output_path(self) -> str: def output_path(self) -> str | None:
"""return the canonical output filename or directory name within the snapshot dir""" """return the canonical output filename or directory name within the snapshot dir"""
return self.extractor_module.get_output_path() try:
return self.extractor_module.get_output_path(self.snapshot)
except Exception as e:
print(f'Error getting output path for {self.extractor} extractor: {e}')
return None
def embed_path(self) -> str: def embed_path(self) -> str | None:
""" """
return the actual runtime-calculated path to the file on-disk that return the actual runtime-calculated path to the file on-disk that
should be used for user-facing iframe embeds of this result should be used for user-facing iframe embeds of this result
""" """
if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None): try:
return get_embed_path_func(self) return self.extractor_module.get_embed_path(self)
except Exception as e:
return self.extractor_module.get_output_path() print(f'Error getting embed path for {self.extractor} extractor: {e}')
return None
def legacy_output_path(self): def legacy_output_path(self):
link = self.snapshot.as_link() link = self.snapshot.as_link()
return link.canonical_outputs().get(f'{self.extractor}_path') return link.canonical_outputs().get(f'{self.extractor}_path')
def output_exists(self) -> bool: def output_exists(self) -> bool:
return os.path.exists(self.output_path()) output_path = self.output_path()
return bool(output_path and os.path.exists(output_path))
def create_output_dir(self): def create_output_dir(self):
snap_dir = self.snapshot_dir snap_dir = Path(self.snapshot_dir)
snap_dir.mkdir(parents=True, exist_ok=True) snap_dir.mkdir(parents=True, exist_ok=True)
return snap_dir / self.output_path() output_path = self.output_path()
if output_path:
(snap_dir / output_path).mkdir(parents=True, exist_ok=True)
else:
raise ValueError(f'Not able to calculate output path for {self.extractor} extractor in {snap_dir}')
return snap_dir / output_path
def as_json(self, *args) -> dict:
args = args or self.keys
return {
key: getattr(self, key)
for key in args
}
# def get_storage_dir(self, create=True, symlink=True): # def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d') # date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')

View file

@ -37,25 +37,44 @@ class SnapshotMachine(StateMachine, strict_states=True):
super().__init__(snapshot, *args, **kwargs) super().__init__(snapshot, *args, **kwargs)
def can_start(self) -> bool: def can_start(self) -> bool:
return self.snapshot.url can_start = bool(self.snapshot.url and (self.snapshot.retry_at < timezone.now()))
if not can_start:
print(f'SnapshotMachine[{self.snapshot.ABID}].can_start() False: {self.snapshot.url} {self.snapshot.retry_at} {timezone.now()}')
return can_start
def is_finished(self) -> bool: def is_finished(self) -> bool:
# if no archiveresults exist yet, it's not finished
if not self.snapshot.archiveresult_set.exists(): if not self.snapshot.archiveresult_set.exists():
return False return False
# if archiveresults exist but are still pending, it's not finished
if self.snapshot.pending_archiveresults().exists(): if self.snapshot.pending_archiveresults().exists():
return False return False
# otherwise archiveresults exist and are all finished, so it's finished
return True return True
@started.enter def on_transition(self, event, state):
def on_started(self): print(f'SnapshotMachine[{self.snapshot.ABID}].on_transition() {event} -> {state}')
print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
self.snapshot.create_pending_archiveresults() @queued.enter
self.snapshot.bump_retry_at(seconds=60) def enter_queued(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_queued(): snapshot.retry_at = now()')
self.snapshot.status = Snapshot.StatusChoices.QUEUED
self.snapshot.retry_at = timezone.now()
self.snapshot.save() self.snapshot.save()
@started.enter
def enter_started(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
self.snapshot.status = Snapshot.StatusChoices.STARTED
self.snapshot.bump_retry_at(seconds=60)
self.snapshot.save()
self.snapshot.create_pending_archiveresults()
@sealed.enter @sealed.enter
def on_sealed(self): def enter_sealed(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None') print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None')
self.snapshot.status = Snapshot.StatusChoices.SEALED
self.snapshot.retry_at = None self.snapshot.retry_at = None
self.snapshot.save() self.snapshot.save()
@ -95,7 +114,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
super().__init__(archiveresult, *args, **kwargs) super().__init__(archiveresult, *args, **kwargs)
def can_start(self) -> bool: def can_start(self) -> bool:
return self.archiveresult.snapshot and self.archiveresult.snapshot.STATE == Snapshot.active_state return self.archiveresult.snapshot and (self.archiveresult.retry_at < timezone.now())
def is_succeeded(self) -> bool: def is_succeeded(self) -> bool:
return self.archiveresult.output_exists() return self.archiveresult.output_exists()
@ -109,29 +128,45 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
def is_finished(self) -> bool: def is_finished(self) -> bool:
return self.is_failed() or self.is_succeeded() return self.is_failed() or self.is_succeeded()
@started.enter
def on_started(self): @queued.enter
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)') def enter_queued(self):
self.archiveresult.start_ts = timezone.now() print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_queued(): archiveresult.retry_at = now()')
self.archiveresult.create_output_dir() self.archiveresult.status = ArchiveResult.StatusChoices.QUEUED
self.archiveresult.bump_retry_at(seconds=60) self.archiveresult.retry_at = timezone.now()
self.archiveresult.save() self.archiveresult.save()
@backoff.enter @started.enter
def on_backoff(self): def enter_started(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.bump_retry_at(+60s)') print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)')
self.archiveresult.status = ArchiveResult.StatusChoices.STARTED
self.archiveresult.start_ts = timezone.now()
self.archiveresult.bump_retry_at(seconds=60) self.archiveresult.bump_retry_at(seconds=60)
self.archiveresult.save() self.archiveresult.save()
self.archiveresult.create_output_dir()
@backoff.enter
def enter_backoff(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.retries += 1, archiveresult.bump_retry_at(+60s), archiveresult.end_ts = None')
self.archiveresult.status = ArchiveResult.StatusChoices.BACKOFF
self.archiveresult.retries = getattr(self.archiveresult, 'retries', 0) + 1
self.archiveresult.bump_retry_at(seconds=60)
self.archiveresult.end_ts = None
self.archiveresult.save()
@succeeded.enter @succeeded.enter
def on_succeeded(self): def enter_succeeded(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.end_ts') print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.retry_at = None, archiveresult.end_ts = now()')
self.archiveresult.status = ArchiveResult.StatusChoices.SUCCEEDED
self.archiveresult.retry_at = None
self.archiveresult.end_ts = timezone.now() self.archiveresult.end_ts = timezone.now()
self.archiveresult.save() self.archiveresult.save()
@failed.enter @failed.enter
def on_failed(self): def enter_failed(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archiveresult.end_ts') print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archivebox.retry_at = None, archiveresult.end_ts = now()')
self.archiveresult.status = ArchiveResult.StatusChoices.FAILED
self.archiveresult.retry_at = None
self.archiveresult.end_ts = timezone.now() self.archiveresult.end_ts = timezone.now()
self.archiveresult.save() self.archiveresult.save()

View file

@ -102,7 +102,8 @@ class SnapshotView(View):
# iterate through all the files in the snapshot dir and add the biggest ones to1 the result list # iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
snap_dir = Path(snapshot.link_dir) snap_dir = Path(snapshot.link_dir)
assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK) if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
return {}
for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')): for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
extension = result_file.suffix.lstrip('.').lower() extension = result_file.suffix.lstrip('.').lower()
@ -504,7 +505,7 @@ def find_config_section(key: str) -> str:
if key in CONSTANTS_CONFIG: if key in CONSTANTS_CONFIG:
return 'CONSTANT' return 'CONSTANT'
matching_sections = [ matching_sections = [
section_id for section_id, section in CONFIGS.items() if key in section.model_fields section_id for section_id, section in CONFIGS.items() if key in dict(section)
] ]
section = matching_sections[0] if matching_sections else 'DYNAMIC' section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section return section
@ -518,8 +519,9 @@ def find_config_default(key: str) -> str:
default_val = None default_val = None
for config in CONFIGS.values(): for config in CONFIGS.values():
if key in config.model_fields: if key in dict(config):
default_val = config.model_fields[key].default default_field = getattr(config, 'model_fields', dict(config))[key]
default_val = default_field.default if hasattr(default_field, 'default') else default_field
break break
if isinstance(default_val, Callable): if isinstance(default_val, Callable):
@ -529,7 +531,6 @@ def find_config_default(key: str) -> str:
else: else:
default_val = str(default_val) default_val = str(default_val)
return default_val return default_val
def find_config_type(key: str) -> str: def find_config_type(key: str) -> str:
@ -567,7 +568,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
} }
for section_id, section in reversed(list(CONFIGS.items())): for section_id, section in reversed(list(CONFIGS.items())):
for key, field in section.model_fields.items(): for key in dict(section).keys():
rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '') rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key)) rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', find_config_type(key))) rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
@ -580,7 +581,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
for key in CONSTANTS_CONFIG.keys(): for key in CONSTANTS_CONFIG.keys():
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '') rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key)) rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', repr(CONSTANTS_CONFIG[key])))) rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', str(CONSTANTS_CONFIG[key]))))
rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)') rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>')) rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>')) # rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
@ -642,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
<code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code> <code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
</a> </a>
<br/><br/> <br/><br/>
<p style="display: {"block" if key in FLAT_CONFIG else "none"}"> <p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
<i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i> <i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
<br/><br/> <br/><br/>
<code>archivebox config --set {key}="{ <code>archivebox config --set {key}="{
val.strip("'") val.strip("'")
if (val := find_config_default(key)) else if (val := find_config_default(key)) else
(repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'") (str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
}"</code> }"</code>
</p> </p>
'''), '''),

View file

@ -1,4 +1,5 @@
__package__ = 'archivebox.crawls' __package__ = 'archivebox.crawls'
__order__ = 100
import abx import abx

View file

@ -18,6 +18,6 @@ class CrawlActor(ActorType[Crawl]):
FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states
STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name
MAX_CONCURRENT_ACTORS: ClassVar[int] = 3 MAX_CONCURRENT_ACTORS: ClassVar[int] = 1
MAX_TICK_TIME: ClassVar[int] = 10 MAX_TICK_TIME: ClassVar[int] = 10
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10 CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

View file

@ -150,8 +150,8 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
parser = (self.seed and self.seed.extractor) or 'auto' parser = (self.seed and self.seed.extractor) or 'auto'
created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>' created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
if self.id and self.seed: if self.id and self.seed:
return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})' return f'\\[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})' return f'\\[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
@classmethod @classmethod
def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None): def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
@ -184,26 +184,27 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl' return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
def pending_snapshots(self) -> QuerySet['Snapshot']: def pending_snapshots(self) -> QuerySet['Snapshot']:
from core.models import Snapshot return self.snapshot_set.filter(retry_at__isnull=False)
return self.snapshot_set.exclude(status__in=Snapshot.FINAL_OR_ACTIVE_STATES)
def pending_archiveresults(self) -> QuerySet['ArchiveResult']: def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
from core.models import ArchiveResult from core.models import ArchiveResult
snapshot_ids = self.snapshot_set.values_list('id', flat=True) snapshot_ids = self.snapshot_set.values_list('id', flat=True)
pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids).exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES) pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=True)
return pending_archiveresults return pending_archiveresults
def create_root_snapshot(self) -> 'Snapshot': def create_root_snapshot(self) -> 'Snapshot':
from core.models import Snapshot from core.models import Snapshot
root_snapshot, _ = Snapshot.objects.get_or_create( root_snapshot, _ = Snapshot.objects.update_or_create(
crawl=self,
url=self.seed.uri, url=self.seed.uri,
status=Snapshot.INITIAL_STATE, defaults={
retry_at=timezone.now(), 'crawl': self,
timestamp=str(timezone.now().timestamp()), 'status': Snapshot.INITIAL_STATE,
# config=self.seed.config, 'retry_at': timezone.now(),
'timestamp': str(timezone.now().timestamp()),
# 'config': self.seed.config,
},
) )
return root_snapshot return root_snapshot

View file

@ -1,5 +1,7 @@
__package__ = 'archivebox.crawls' __package__ = 'archivebox.crawls'
from django.utils import timezone
from statemachine import State, StateMachine from statemachine import State, StateMachine
from crawls.models import Crawl from crawls.models import Crawl
@ -31,7 +33,7 @@ class CrawlMachine(StateMachine, strict_states=True):
super().__init__(crawl, *args, **kwargs) super().__init__(crawl, *args, **kwargs)
def can_start(self) -> bool: def can_start(self) -> bool:
return self.crawl.seed and self.crawl.seed.uri return bool(self.crawl.seed and self.crawl.seed.uri and (self.retry_at < timezone.now()))
def is_finished(self) -> bool: def is_finished(self) -> bool:
if not self.crawl.snapshot_set.exists(): if not self.crawl.snapshot_set.exists():
@ -47,15 +49,17 @@ class CrawlMachine(StateMachine, strict_states=True):
# return "before_transition_return" # return "before_transition_return"
@started.enter @started.enter
def on_started(self): def enter_started(self):
print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)') print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)')
self.crawl.create_root_snapshot() self.crawl.status = Crawl.StatusChoices.STARTED
self.crawl.bump_retry_at(seconds=10) self.crawl.bump_retry_at(seconds=10)
self.crawl.save() self.crawl.save()
self.crawl.create_root_snapshot()
@sealed.enter @sealed.enter
def on_sealed(self): def enter_sealed(self):
print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None') print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None')
self.crawl.status = Crawl.StatusChoices.SEALED
self.crawl.retry_at = None self.crawl.retry_at = None
self.crawl.save() self.crawl.save()

View file

@ -11,7 +11,7 @@ from django.utils.functional import cached_property
import abx import abx
import archivebox import archivebox
from pydantic_pkgr import Binary, BinProvider from abx_pkg import Binary, BinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@ -323,7 +323,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# whereas a loaded binary is a not-yet saved instance that may not have the same config # whereas a loaded binary is a not-yet saved instance that may not have the same config
# why would we want to load a binary record from the db when it could be freshly loaded? # why would we want to load a binary record from the db when it could be freshly loaded?
def load_from_db(self) -> Binary: def load_from_db(self) -> Binary:
# TODO: implement defaults arg in pydantic_pkgr # TODO: implement defaults arg in abx_pkg
# return self.BINARY.load(defaults={ # return self.BINARY.load(defaults={
# 'binprovider': self.BINPROVIDER, # 'binprovider': self.BINPROVIDER,
# 'abspath': Path(self.abspath), # 'abspath': Path(self.abspath),

View file

@ -14,7 +14,7 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet from django.db.models import QuerySet
from django.utils import timezone from django.utils import timezone
from pydantic_pkgr import Binary from abx_pkg import Binary
import abx import abx
import archivebox import archivebox

View file

@ -6,7 +6,7 @@ PKGS_DIR = Path(__file__).parent
VENDORED_PKGS = [ VENDORED_PKGS = [
'abx', 'abx',
# 'pydantic-pkgr', # 'abx-pkg',
# ... everything else in archivebox/pkgs/* comes after ... # ... everything else in archivebox/pkgs/* comes after ...
] ]

View file

@ -1,3 +1,4 @@
__package__ = 'abx_plugin_chrome'
__label__ = 'Chrome' __label__ = 'Chrome'
__author__ = 'ArchiveBox' __author__ = 'ArchiveBox'
@ -25,10 +26,11 @@ def ready():
CHROME_CONFIG.validate() CHROME_CONFIG.validate()
# @abx.hookimpl @abx.hookimpl
# def get_EXTRACTORS(): def get_EXTRACTORS():
# return { from .extractors import PDF_EXTRACTOR, SCREENSHOT_EXTRACTOR, DOM_EXTRACTOR
# 'pdf': PDF_EXTRACTOR, return {
# 'screenshot': SCREENSHOT_EXTRACTOR, 'pdf': PDF_EXTRACTOR,
# 'dom': DOM_EXTRACTOR, 'screenshot': SCREENSHOT_EXTRACTOR,
# } 'dom': DOM_EXTRACTOR,
}

View file

@ -4,7 +4,7 @@ from pathlib import Path
from typing import List, Optional from typing import List, Optional
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import ( from abx_pkg import (
Binary, Binary,
BinProvider, BinProvider,
BinName, BinName,

View file

@ -3,7 +3,7 @@ from pathlib import Path
from typing import List, Optional from typing import List, Optional
from pydantic import Field from pydantic import Field
from pydantic_pkgr import bin_abspath from abx_pkg import bin_abspath
from abx_spec_config.base_configset import BaseConfigSet from abx_spec_config.base_configset import BaseConfigSet
from abx_plugin_default_binproviders import env from abx_plugin_default_binproviders import env

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
] ]
[build-system] [build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_curl'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env from abx_plugin_default_binproviders import apt, brew, env

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
] ]
[build-system] [build-system]

View file

@ -3,7 +3,7 @@ import abx
from typing import Dict from typing import Dict
from pydantic_pkgr import ( from abx_pkg import (
AptProvider, AptProvider,
BrewProvider, BrewProvider,
EnvProvider, EnvProvider,

View file

@ -6,8 +6,8 @@ readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
] ]
[build-system] [build-system]

View file

@ -20,10 +20,10 @@ def get_CONFIG():
} }
# @abx.hookimpl @abx.hookimpl
# def get_EXTRACTORS(): def get_EXTRACTORS():
# from .extractors import FAVICON_EXTRACTOR from .extractors import FAVICON_EXTRACTOR
# return { return {
# 'favicon': FAVICON_EXTRACTOR, 'favicon': FAVICON_EXTRACTOR,
# } }

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_git'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,15 +1,20 @@
__package__ = 'abx_plugin_git' __package__ = 'abx_plugin_git'
# from pathlib import Path from pathlib import Path
# from .binaries import GIT_BINARY
# class GitExtractor(BaseExtractor): from abx_pkg import BinName
# name: ExtractorName = 'git'
# binary: str = GIT_BINARY.name
# def get_output_path(self, snapshot) -> Path | None: from abx_spec_extractor import BaseExtractor, ExtractorName
# return snapshot.as_link() / 'git'
# GIT_EXTRACTOR = GitExtractor() from .binaries import GIT_BINARY
class GitExtractor(BaseExtractor):
name: ExtractorName = 'git'
binary: BinName = GIT_BINARY.name
def get_output_path(self, snapshot) -> Path | None:
return snapshot.as_link() / 'git'
GIT_EXTRACTOR = GitExtractor()

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24", "abx-plugin-default-binproviders>=2024.10.24",
] ]

View file

@ -6,7 +6,7 @@ from typing import List
from pathlib import Path from pathlib import Path
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider from abx_pkg import BinaryOverrides, SemVer, Binary, BinProvider
from abx_plugin_default_binproviders import apt from abx_plugin_default_binproviders import apt
from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_mercury'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary from abx_pkg import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
from abx_plugin_default_binproviders import env from abx_plugin_default_binproviders import env

View file

@ -1,17 +1,20 @@
__package__ = 'abx_plugin_mercury' __package__ = 'abx_plugin_mercury'
# from pathlib import Path from pathlib import Path
# from .binaries import MERCURY_BINARY from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import MERCURY_BINARY
# class MercuryExtractor(BaseExtractor): class MercuryExtractor(BaseExtractor):
# name: ExtractorName = 'mercury' name: ExtractorName = 'mercury'
# binary: str = MERCURY_BINARY.name binary: BinName = MERCURY_BINARY.name
# def get_output_path(self, snapshot) -> Path | None: def get_output_path(self, snapshot) -> Path | None:
# return snapshot.link_dir / 'mercury' / 'content.html' return snapshot.link_dir / 'mercury' / 'content.html'
# MERCURY_EXTRACTOR = MercuryExtractor() MERCURY_EXTRACTOR = MercuryExtractor()

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_pkg.npm' __package__ = 'abx_plugin_npm'
from typing import List from typing import List
@ -6,7 +6,7 @@ from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from benedict import benedict from benedict import benedict
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides
from abx_plugin_default_binproviders import get_BINPROVIDERS from abx_plugin_default_binproviders import get_BINPROVIDERS

View file

@ -2,7 +2,7 @@ import os
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName from abx_pkg import NpmProvider, PATHStr, BinProviderName
import abx import abx

View file

@ -6,8 +6,8 @@ readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24", "abx-plugin-default-binproviders>=2024.10.24",
] ]

View file

@ -1,5 +1,6 @@
__package__ = 'abx_plugin_pip' __package__ = 'abx_plugin_pip'
__label__ = 'PIP' __label__ = 'PIP'
__order__ = 200
import abx import abx

View file

@ -9,7 +9,7 @@ from pydantic import InstanceOf, Field, model_validator
import django import django
import django.db.backends.sqlite3.base import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type] from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides, SemVer
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew

View file

@ -6,7 +6,7 @@ from typing import Optional
from benedict import benedict from benedict import benedict
from pydantic_pkgr import PipProvider, BinName, BinProviderName from abx_pkg import PipProvider, BinName, BinProviderName
import abx import abx

View file

@ -6,9 +6,9 @@ readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24", "abx-plugin-default-binproviders>=2024.10.24",
"django>=5.0.0", "django>=5.0.0",
] ]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_playwright'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinName, BinProvider, Binary from abx_pkg import BinName, BinProvider, Binary
from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

View file

@ -7,7 +7,7 @@ from pathlib import Path
from typing import List, Optional, Dict, ClassVar from typing import List, Optional, Dict, ClassVar
from pydantic import Field from pydantic import Field
from pydantic_pkgr import ( from abx_pkg import (
BinName, BinName,
BinProvider, BinProvider,
BinProviderName, BinProviderName,

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"pydantic>=2.4.2", "pydantic>=2.4.2",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
] ]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_puppeteer'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import env from abx_plugin_default_binproviders import env

View file

@ -4,7 +4,7 @@ from pathlib import Path
from typing import List, Optional, Dict, ClassVar from typing import List, Optional, Dict, ClassVar
from pydantic import Field from pydantic import Field
from pydantic_pkgr import ( from abx_pkg import (
BinProvider, BinProvider,
BinName, BinName,
BinProviderName, BinProviderName,

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
] ]
[build-system] [build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_readability'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName
from abx_plugin_default_binproviders import env from abx_plugin_default_binproviders import env
from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

View file

@ -1,19 +1,19 @@
# __package__ = 'abx_plugin_readability' # __package__ = 'abx_plugin_readability'
# from pathlib import Path from pathlib import Path
# from pydantic_pkgr import BinName from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import READABILITY_BINARY
# from .binaries import READABILITY_BINARY class ReadabilityExtractor(BaseExtractor):
name: ExtractorName = 'readability'
binary: BinName = READABILITY_BINARY.name
def get_output_path(self, snapshot) -> Path:
return Path(snapshot.link_dir) / 'readability' / 'content.html'
# class ReadabilityExtractor(BaseExtractor): READABILITY_EXTRACTOR = ReadabilityExtractor()
# name: str = 'readability'
# binary: BinName = READABILITY_BINARY.name
# def get_output_path(self, snapshot) -> Path:
# return Path(snapshot.link_dir) / 'readability' / 'content.html'
# READABILITY_EXTRACTOR = ReadabilityExtractor()

View file

@ -3,7 +3,7 @@ __id__ = 'abx_plugin_readwise_extractor'
__label__ = 'Readwise API' __label__ = 'Readwise API'
__version__ = '2024.10.27' __version__ = '2024.10.27'
__author__ = 'ArchiveBox' __author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise' __homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/pkgs/abx-plugin-readwise-extractor'
__dependencies__ = [] __dependencies__ = []
import abx import abx

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_ripgrep_search'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,7 +1,7 @@
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
from abx_plugin_default_binproviders import env from abx_plugin_default_binproviders import env
from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

View file

@ -1,18 +1,21 @@
__package__ = 'abx_plugin_singlefile' __package__ = 'abx_plugin_singlefile'
# from pathlib import Path
# from pydantic_pkgr import BinName from pathlib import Path
# from .binaries import SINGLEFILE_BINARY from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import SINGLEFILE_BINARY
# class SinglefileExtractor(BaseExtractor): class SinglefileExtractor(BaseExtractor):
# name: str = 'singlefile' name: ExtractorName = 'singlefile'
# binary: BinName = SINGLEFILE_BINARY.name binary: BinName = SINGLEFILE_BINARY.name
# def get_output_path(self, snapshot) -> Path: def get_output_path(self, snapshot) -> Path:
# return Path(snapshot.link_dir) / 'singlefile.html' return Path(snapshot.link_dir) / 'singlefile.html'
# SINGLEFILE_EXTRACTOR = SinglefileExtractor() SINGLEFILE_EXTRACTOR = SinglefileExtractor()

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
] ]
[build-system] [build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_sonic_search'
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
from abx_plugin_default_binproviders import brew, env from abx_plugin_default_binproviders import brew, env

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_search.sonic' __package__ = 'abx_plugin_sonic_search'
from typing import List, Generator, cast from typing import List, Generator, cast

View file

@ -7,9 +7,9 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"abx-spec-searchbackend>=0.1.0", "abx-spec-searchbackend>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
] ]
[build-system] [build-system]

View file

@ -7,3 +7,11 @@ import abx
# return { # return {
# 'title_extractor': TITLE_EXTRACTOR_CONFIG # 'title_extractor': TITLE_EXTRACTOR_CONFIG
# } # }
@abx.hookimpl
def get_EXTRACTORS():
from .extractors import TITLE_EXTRACTOR
return {
'title': TITLE_EXTRACTOR,
}

View file

@ -4,7 +4,7 @@ from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,35 +1,37 @@
__package__ = 'abx_plugin_wget' __package__ = 'abx_plugin_wget'
# from pathlib import Path from pathlib import Path
# from pydantic_pkgr import BinName from abx_pkg import BinName
# from .binaries import WGET_BINARY from abx_spec_extractor import BaseExtractor, ExtractorName
# from .wget_util import wget_output_path
# class WgetExtractor(BaseExtractor): from .binaries import WGET_BINARY
# name: ExtractorName = 'wget' from .wget_util import wget_output_path
# binary: BinName = WGET_BINARY.name
# def get_output_path(self, snapshot) -> Path | None: class WgetExtractor(BaseExtractor):
# wget_index_path = wget_output_path(snapshot.as_link()) name: ExtractorName = 'wget'
# if wget_index_path: binary: BinName = WGET_BINARY.name
# return Path(wget_index_path)
# return None
# WGET_EXTRACTOR = WgetExtractor() def get_output_path(self, snapshot) -> Path | None:
wget_index_path = wget_output_path(snapshot.as_link())
if wget_index_path:
return Path(wget_index_path)
return None
WGET_EXTRACTOR = WgetExtractor()
# class WarcExtractor(BaseExtractor): class WarcExtractor(BaseExtractor):
# name: ExtractorName = 'warc' name: ExtractorName = 'warc'
# binary: BinName = WGET_BINARY.name binary: BinName = WGET_BINARY.name
# def get_output_path(self, snapshot) -> Path | None: def get_output_path(self, snapshot) -> Path | None:
# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz')) warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
# if warc_files: if warc_files:
# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0] return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
# return None return None
# WARC_EXTRACTOR = WarcExtractor() WARC_EXTRACTOR = WarcExtractor()

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
] ]
[build-system] [build-system]

View file

@ -4,7 +4,7 @@ import subprocess
from typing import List from typing import List
from pydantic import InstanceOf from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary from abx_pkg import BinProvider, BinName, BinaryOverrides, Binary
from abx_plugin_default_binproviders import apt, brew, env from abx_plugin_default_binproviders import apt, brew, env
from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_extractor.ytdlp' __package__ = 'abx_plugin_ytdlp'
from typing import List from typing import List

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.0",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.5.4",
] ]
[build-system] [build-system]

View file

@ -11,13 +11,13 @@ from typing import cast
import abx import abx
from abx_spec_config import ConfigPluginSpec from abx_spec_config import ConfigPluginSpec
from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec from abx_spec_abx_pkg import AbxPkgPluginSpec
from abx_spec_django import DjangoPluginSpec from abx_spec_django import DjangoPluginSpec
from abx_spec_searchbackend import SearchBackendPluginSpec from abx_spec_searchbackend import SearchBackendPluginSpec
class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec): class ArchiveBoxPluginSpec(ConfigPluginSpec, AbxPkgPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
""" """
ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs. ArchiveBox plugins can use any of the hooks from the Config, AbxPkg, and Django plugin specs.
""" """
pass pass

View file

@ -2,10 +2,12 @@ __order__ = 100
import os import os
from pathlib import Path from pathlib import Path
from typing import Dict, Any, cast from typing import Any, cast, TYPE_CHECKING
from benedict import benedict from benedict import benedict
if TYPE_CHECKING:
from archivebox.config.constants import ConstantsDict
import abx import abx
@ -13,38 +15,43 @@ from .base_configset import BaseConfigSet, ConfigKeyStr
class ConfigPluginSpec: class ConfigPluginSpec:
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_collection_config_path(self) -> Path: def get_collection_config_path() -> Path:
return Path(os.getcwd()) / "ArchiveBox.conf" return Path(os.getcwd()) / "ArchiveBox.conf"
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_system_config_path(self) -> Path: def get_system_config_path() -> Path:
return Path('~/.config/abx/abx.conf').expanduser() return Path('~/.config/abx/abx.conf').expanduser()
@staticmethod
@abx.hookspec @abx.hookspec
@abx.hookimpl @abx.hookimpl
def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]: def get_CONFIG() -> dict[abx.PluginId, 'BaseConfigSet | ConstantsDict']:
from archivebox import CONSTANTS
"""Get the config for a single plugin -> {plugin_id: PluginConfigSet()}""" """Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
return { return {
# override this in your plugin to return your plugin's config, e.g. 'CONSTANTS': CONSTANTS,
# 'ytdlp': YtdlpConfig(...),
} }
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]: def get_CONFIGS() -> dict[abx.PluginId, BaseConfigSet]:
"""Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}""" """Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
return abx.as_dict(pm.hook.get_CONFIG()) return abx.as_dict(pm.hook.get_CONFIG())
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]: def get_FLAT_CONFIG() -> dict[ConfigKeyStr, Any]:
"""Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}""" """Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
return benedict({ return benedict({
key: value key: value
@ -52,9 +59,10 @@ class ConfigPluginSpec:
for key, value in benedict(configset).items() for key, value in benedict(configset).items()
}) })
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]: def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
"""Get the config as it applies to you right now, based on the current context""" """Get the config as it applies to you right now, based on the current context"""
return benedict({ return benedict({
**pm.hook.get_default_config(default=default), **pm.hook.get_default_config(default=default),
@ -69,35 +77,41 @@ class ConfigPluginSpec:
**(extra or {}), **(extra or {}),
}) })
@staticmethod
# @abx.hookspec(firstresult=True) # @abx.hookspec(firstresult=True)
# @abx.hookimpl # @abx.hookimpl
# def get_request_config(self, request) -> dict: # def get_request_config(request) -> dict:
# session = getattr(request, 'session', None) # session = getattr(request, 'session', None)
# return getattr(session, 'config', None) or {} # return getattr(session, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]: def get_archiveresult_config(archiveresult) -> dict[ConfigKeyStr, Any]:
return getattr(archiveresult, 'config', None) or {} return getattr(archiveresult, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]: def get_snapshot_config(snapshot) -> dict[ConfigKeyStr, Any]:
return getattr(snapshot, 'config', None) or {} return getattr(snapshot, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]: def get_crawl_config(crawl) -> dict[ConfigKeyStr, Any]:
return getattr(crawl, 'config', None) or {} return getattr(crawl, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]: def get_user_config(user=None) -> dict[ConfigKeyStr, Any]:
return getattr(user, 'config', None) or {} return getattr(user, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]: def get_collection_config(collection=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file # ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
# {} = empty dict, override to ignore the collection config # {} = empty dict, override to ignore the collection config
return benedict({ return benedict({
@ -106,9 +120,10 @@ class ConfigPluginSpec:
for key, value in configset.from_collection().items() for key, value in configset.from_collection().items()
}) if collection == ... else collection }) if collection == ... else collection
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]: def get_environment_config(environment=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the environment config from the active environment variables # ... = ellipsis, means automatically get the environment config from the active environment variables
# {} = empty dict, override to ignore the environment config # {} = empty dict, override to ignore the environment config
return benedict({ return benedict({
@ -117,18 +132,20 @@ class ConfigPluginSpec:
for key, value in configset.from_environment().items() for key, value in configset.from_environment().items()
}) if environment == ... else environment }) if environment == ... else environment
@staticmethod
# @abx.hookspec(firstresult=True) # @abx.hookspec(firstresult=True)
# @abx.hookimpl # @abx.hookimpl
# def get_machine_config(self, machine=...) -> dict: # def get_machine_config(machine=...) -> dict:
# # ... = ellipsis, means automatically get the machine config from the currently executing machine # # ... = ellipsis, means automatically get the machine config from the currently executing machine
# # {} = empty dict, override to ignore the machine config # # {} = empty dict, override to ignore the machine config
# if machine == ...: # if machine == ...:
# machine = Machine.objects.get_current() # machine = Machine.objects.get_current()
# return getattr(machine, 'config', None) or {} # return getattr(machine, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True) @abx.hookspec(firstresult=True)
@abx.hookimpl @abx.hookimpl
def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]: def get_default_config(default=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the machine config from the currently executing machine # ... = ellipsis, means automatically get the machine config from the currently executing machine
# {} = empty dict, override to ignore the machine config # {} = empty dict, override to ignore the machine config
return benedict({ return benedict({

View file

@ -1,4 +1,3 @@
__order__ = 300
import abx import abx
from typing import List, Dict, Any, cast from typing import List, Dict, Any, cast
@ -6,6 +5,8 @@ from typing import List, Dict, Any, cast
########################################################################################### ###########################################################################################
class DjangoPluginSpec: class DjangoPluginSpec:
__order__ = 10
@abx.hookspec @abx.hookspec
def get_INSTALLED_APPS() -> List[str]: def get_INSTALLED_APPS() -> List[str]:
return ['abx_spec_django'] return ['abx_spec_django']

View file

@ -1,10 +1,12 @@
__order__ = 10
import os import os
from typing import Optional, List, Annotated, Tuple from typing import Optional, List, Annotated, Tuple
from pathlib import Path from pathlib import Path
from pydantic import AfterValidator from pydantic import AfterValidator
from pydantic_pkgr import BinName from abx_pkg import BinName
import abx import abx
@ -23,7 +25,7 @@ CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_em
@abx.hookspec @abx.hookspec
@abx.hookimpl @abx.hookimpl
def get_EXTRACTORS(): def get_EXTRACTORS():
return [] return {}
@abx.hookspec @abx.hookspec
@abx.hookimpl @abx.hookimpl

View file

@ -1,114 +0,0 @@
__order__ = 200
import os
from typing import Dict, cast
from pathlib import Path
from pydantic_pkgr import Binary, BinProvider
import abx
from abx_spec_config import ConfigPluginSpec
###########################################################################################
class PydanticPkgrPluginSpec:
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_LIB_DIR(self) -> Path:
"""Get the directory where shared runtime libraries/dependencies should be installed"""
FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
return LIB_DIR
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BIN_DIR(self) -> Path:
"""Get the directory where binaries should be symlinked to"""
FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
LIB_DIR = pm.hook.get_LIB_DIR()
BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
return BIN_DIR
@abx.hookspec
@abx.hookimpl
def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
return {
# to be implemented by plugins, e.g.:
# 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
}
@abx.hookspec
@abx.hookimpl
def get_BINARIES(self) -> Dict[str, Binary]:
return {
# to be implemented by plugins, e.g.:
# 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
}
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
"""Get a specific BinProvider by name"""
return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BINARY(self, bin_name: str) -> Binary:
"""Get a specific Binary by name"""
return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_load(self, binary: Binary, **kwargs) -> Binary:
"""Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
loaded_binary = binary.load(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_install(self, binary: Binary, **kwargs) -> Binary:
"""Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
loaded_binary = binary.install(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
"""Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
loaded_binary = binary.load_or_install(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
if not (binary.abspath and os.path.isfile(binary.abspath)):
return
BIN_DIR = pm.hook.get_BIN_DIR()
try:
BIN_DIR.mkdir(parents=True, exist_ok=True)
symlink = BIN_DIR / binary.name
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
symlink.chmod(0o777) # make sure its executable by everyone
except Exception:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
PLUGIN_SPEC = PydanticPkgrPluginSpec
class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
pass
TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
pm = cast(TypedPluginManager, abx.pm)

View file

@ -1,17 +0,0 @@
[project]
name = "abx-spec-pydantic-pkgr"
version = "0.1.0"
description = "The ABX plugin specification for Binaries and BinProviders"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr"

View file

@ -25,6 +25,9 @@ class BaseSearchBackend(abc.ABC):
class SearchBackendPluginSpec: class SearchBackendPluginSpec:
__order__ = 10
@staticmethod
@abx.hookspec @abx.hookspec
@abx.hookimpl @abx.hookimpl
def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]: def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:

View file

@ -244,10 +244,12 @@ def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int,
except FileNotFoundError: except FileNotFoundError:
pass pass
default_order = 10 if '_spec_' in str(plugin_dir).lower() else 999
if plugin_module: if plugin_module:
order = getattr(plugin_module, '__order__', 999) order = getattr(plugin_module, '__order__', default_order)
else: else:
order = 999 order = default_order
assert order is not None assert order is not None
assert plugin_dir assert plugin_dir
@ -270,7 +272,10 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
elif inspect.isclass(plugin): elif inspect.isclass(plugin):
module = inspect.getmodule(plugin) module = inspect.getmodule(plugin)
else: else:
raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}') plugin = type(plugin)
module = inspect.getmodule(plugin)
# raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
assert module assert module
@ -416,9 +421,14 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
PLUGINS_TO_LOAD = [] PLUGINS_TO_LOAD = []
LOADED_PLUGINS = {} LOADED_PLUGINS = {}
for plugin in plugins: plugin_infos = sorted([
plugin_info = get_plugin(plugin) get_plugin(plugin)
assert plugin_info, f'No plugin metadata found for {plugin}' for plugin in plugins
], key=lambda plugin: plugin.get('order', 999))
for plugin_info in plugin_infos:
assert plugin_info, 'No plugin metadata found for plugin'
assert 'id' in plugin_info and 'module' in plugin_info assert 'id' in plugin_info and 'module' in plugin_info
if plugin_info['module'] in pm.get_plugins(): if plugin_info['module'] in pm.get_plugins():
LOADED_PLUGINS[plugin_info['id']] = plugin_info LOADED_PLUGINS[plugin_info['id']] = plugin_info
@ -431,7 +441,7 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
for plugin_info in PLUGINS_TO_LOAD: for plugin_info in PLUGINS_TO_LOAD:
pm.register(plugin_info['module']) pm.register(plugin_info['module'])
LOADED_PLUGINS[plugin_info['id']] = plugin_info LOADED_PLUGINS[plugin_info['id']] = plugin_info
# print(f' √ Loaded plugin: {plugin_id}') print(f' √ Loaded plugin: {plugin_info["id"]}')
return benedict(LOADED_PLUGINS) return benedict(LOADED_PLUGINS)
@cache @cache

View file

@ -1,5 +1,6 @@
__package__ = 'archivebox.seeds' __package__ = 'archivebox.seeds'
__order__ = 100
import abx import abx

View file

@ -1,6 +1,6 @@
[project] [project]
name = "archivebox" name = "archivebox"
version = "0.8.6rc1" version = "0.8.6rc2"
requires-python = ">=3.10" requires-python = ">=3.10"
description = "Self-hosted internet archiving solution." description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}] authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@ -80,13 +80,13 @@ dependencies = [
"django-taggit==6.1.0", "django-taggit==6.1.0",
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
"platformdirs>=4.3.6", "platformdirs>=4.3.6",
"pydantic-pkgr>=0.5.4", "abx-pkg>=0.6.0",
"pocket>=0.3.6", "pocket>=0.3.6",
"sonic-client>=1.0.0", "sonic-client>=1.0.0",
"yt-dlp>=2024.8.6", # for: media" "yt-dlp>=2024.8.6", # for: media"
############# Plugin Dependencies ################ ############# Plugin Dependencies ################
"abx>=0.1.0", "abx>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0", "abx-spec-abx-pkg>=0.1.1",
"abx-spec-config>=0.1.0", "abx-spec-config>=0.1.0",
"abx-spec-archivebox>=0.1.0", "abx-spec-archivebox>=0.1.0",
"abx-spec-django>=0.1.0", "abx-spec-django>=0.1.0",
@ -178,10 +178,10 @@ dev-dependencies = [
] ]
[tool.uv.sources] [tool.uv.sources]
# pydantic-pkgr = { workspace = true } # abx-pkg = { workspace = true }
abx = { workspace = true } abx = { workspace = true }
abx-spec-pydantic-pkgr = { workspace = true } abx-spec-abx-pkg = { workspace = true }
abx-spec-config = { workspace = true } abx-spec-config = { workspace = true }
abx-spec-archivebox = { workspace = true } abx-spec-archivebox = { workspace = true }
abx-spec-django = { workspace = true } abx-spec-django = { workspace = true }

View file

@ -1,6 +1,6 @@
# This file was autogenerated by uv via the following command: # This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml --all-extras -o requirements.txt # uv pip compile pyproject.toml --all-extras -o requirements.txt
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-archivedotorg # abx-plugin-archivedotorg
@ -24,65 +24,65 @@
# abx-plugin-title # abx-plugin-title
# abx-plugin-wget # abx-plugin-wget
# abx-plugin-ytdlp # abx-plugin-ytdlp
# abx-spec-abx-pkg
# abx-spec-archivebox # abx-spec-archivebox
# abx-spec-config # abx-spec-config
# abx-spec-django # abx-spec-django
# abx-spec-extractor # abx-spec-extractor
# abx-spec-pydantic-pkgr
# abx-spec-searchbackend # abx-spec-searchbackend
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-archivedotorg
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-chrome
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-curl
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-archivedotorg # abx-plugin-archivedotorg
# abx-plugin-favicon # abx-plugin-favicon
# abx-plugin-title # abx-plugin-title
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-default-binproviders
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-git # abx-plugin-git
# abx-plugin-npm # abx-plugin-npm
# abx-plugin-pip # abx-plugin-pip
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-favicon
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-git
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-htmltotext
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ldap-auth
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-mercury
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-npm
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-pip
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-playwright
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-puppeteer
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-readability
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ripgrep-search
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-singlefile
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sonic-search
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sqlitefts-search
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-title
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-wget
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ytdlp
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-archivebox
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-config
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-archivedotorg # abx-plugin-archivedotorg
@ -105,13 +105,13 @@
# abx-plugin-title # abx-plugin-title
# abx-plugin-wget # abx-plugin-wget
# abx-plugin-ytdlp # abx-plugin-ytdlp
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-django
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-ldap-auth # abx-plugin-ldap-auth
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-extractor
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-abx-pkg
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-chrome # abx-plugin-chrome
@ -126,12 +126,24 @@
# abx-plugin-sonic-search # abx-plugin-sonic-search
# abx-plugin-wget # abx-plugin-wget
# abx-plugin-ytdlp # abx-plugin-ytdlp
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend -e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-searchbackend
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-plugin-ripgrep-search # abx-plugin-ripgrep-search
# abx-plugin-sonic-search # abx-plugin-sonic-search
# abx-plugin-sqlitefts-search # abx-plugin-sqlitefts-search
abx-pkg==0.6.0
# via
# archivebox (pyproject.toml)
# abx-plugin-default-binproviders
# abx-plugin-npm
# abx-plugin-pip
# abx-plugin-playwright
# abx-plugin-puppeteer
# abx-plugin-singlefile
# abx-plugin-sonic-search
# abx-plugin-ytdlp
# abx-spec-abx-pkg
annotated-types==0.7.0 annotated-types==0.7.0
# via pydantic # via pydantic
anyio==4.6.2.post1 anyio==4.6.2.post1
@ -159,11 +171,9 @@ base32-crockford==0.3.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
# via python-benedict # via python-benedict
brotli==1.1.0 bx-django-utils==82
# via yt-dlp
bx-django-utils==81
# via django-huey-monitor # via django-huey-monitor
bx-py-utils==105 bx-py-utils==106
# via # via
# bx-django-utils # bx-django-utils
# django-huey-monitor # django-huey-monitor
@ -172,10 +182,9 @@ certifi==2024.8.30
# httpcore # httpcore
# httpx # httpx
# requests # requests
# yt-dlp
cffi==1.17.1 cffi==1.17.1
# via cryptography # via cryptography
channels==4.1.0 channels==4.2.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
charset-normalizer==3.4.0 charset-normalizer==3.4.0
# via requests # via requests
@ -197,7 +206,7 @@ decorator==5.1.1
# via # via
# ipdb # ipdb
# ipython # ipython
django==5.1.2 django==5.1.3
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx # abx
@ -270,7 +279,7 @@ ftfy==6.3.1
# via python-benedict # via python-benedict
h11==0.14.0 h11==0.14.0
# via httpcore # via httpcore
httpcore==1.0.6 httpcore==1.0.7
# via httpx # via httpx
httpx==0.27.2 httpx==0.27.2
# via django-signal-webhooks # via django-signal-webhooks
@ -297,7 +306,7 @@ ipython==8.29.0
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# ipdb # ipdb
jedi==0.19.1 jedi==0.19.2
# via ipython # via ipython
libcst==1.5.0 libcst==1.5.0
# via django-autotyping # via django-autotyping
@ -309,8 +318,6 @@ matplotlib-inline==0.1.7
# via ipython # via ipython
mdurl==0.1.2 mdurl==0.1.2
# via markdown-it-py # via markdown-it-py
mutagen==1.47.0
# via yt-dlp
mypy-extensions==1.0.0 mypy-extensions==1.0.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
openpyxl==3.1.5 openpyxl==3.1.5
@ -319,12 +326,14 @@ parso==0.8.4
# via jedi # via jedi
pexpect==4.9.0 pexpect==4.9.0
# via ipython # via ipython
phonenumbers==8.13.48 phonenumbers==8.13.50
# via python-benedict # via python-benedict
pip==24.3.1
# via abx-pkg
platformdirs==4.3.6 platformdirs==4.3.6
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# pydantic-pkgr # abx-pkg
pluggy==1.5.0 pluggy==1.5.0
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
@ -352,34 +361,20 @@ pyasn1-modules==0.4.1
# service-identity # service-identity
pycparser==2.22 pycparser==2.22
# via cffi # via cffi
pycryptodomex==3.21.0
# via yt-dlp
pydantic==2.9.2 pydantic==2.9.2
# via # via
# abx-pkg
# abx-plugin-playwright # abx-plugin-playwright
# abx-spec-config # abx-spec-config
# abx-spec-extractor # abx-spec-extractor
# abx-spec-searchbackend # abx-spec-searchbackend
# django-ninja # django-ninja
# django-pydantic-field # django-pydantic-field
# pydantic-pkgr
# pydantic-settings # pydantic-settings
pydantic-core==2.23.4 pydantic-core==2.23.4
# via # via
# abx-pkg
# pydantic # pydantic
# pydantic-pkgr
pydantic-pkgr==0.5.4
# via
# archivebox (pyproject.toml)
# abx-plugin-default-binproviders
# abx-plugin-npm
# abx-plugin-pip
# abx-plugin-playwright
# abx-plugin-puppeteer
# abx-plugin-singlefile
# abx-plugin-sonic-search
# abx-plugin-ytdlp
# abx-spec-pydantic-pkgr
pydantic-settings==2.6.1 pydantic-settings==2.6.1
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
@ -414,6 +409,8 @@ python-ldap==3.4.4
# django-auth-ldap # django-auth-ldap
python-slugify==8.0.4 python-slugify==8.0.4
# via python-benedict # via python-benedict
python-statemachine==2.4.0
# via archivebox (pyproject.toml)
python-stdnum==1.20 python-stdnum==1.20
# via bx-django-utils # via bx-django-utils
pytz==2024.2 pytz==2024.2
@ -424,14 +421,13 @@ pyyaml==6.0.2
# via # via
# libcst # libcst
# python-benedict # python-benedict
regex==2024.9.11 regex==2024.11.6
# via dateparser # via dateparser
requests==2.32.3 requests==2.32.3
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# pocket # pocket
# python-benedict # python-benedict
# yt-dlp
requests-tracker==0.3.3 requests-tracker==0.3.3
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
rich==13.9.4 rich==13.9.4
@ -443,7 +439,7 @@ rich-argparse==1.6.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
service-identity==24.2.0 service-identity==24.2.0
# via twisted # via twisted
setuptools==75.3.0 setuptools==75.5.0
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# autobahn # autobahn
@ -464,7 +460,7 @@ sonic-client==1.0.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
soupsieve==2.6 soupsieve==2.6
# via beautifulsoup4 # via beautifulsoup4
sqlparse==0.5.1 sqlparse==0.5.2
# via # via
# django # django
# django-debug-toolbar # django-debug-toolbar
@ -492,12 +488,12 @@ types-pyyaml==6.0.12.20240917
typing-extensions==4.12.2 typing-extensions==4.12.2
# via # via
# archivebox (pyproject.toml) # archivebox (pyproject.toml)
# abx-pkg
# django-pydantic-field # django-pydantic-field
# django-stubs # django-stubs
# django-stubs-ext # django-stubs-ext
# pydantic # pydantic
# pydantic-core # pydantic-core
# pydantic-pkgr
# twisted # twisted
tzdata==2024.2 tzdata==2024.2
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
@ -506,9 +502,7 @@ tzlocal==5.2
ulid-py==1.1.0 ulid-py==1.1.0
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
urllib3==2.2.3 urllib3==2.2.3
# via # via requests
# requests
# yt-dlp
uuid6==2024.7.10 uuid6==2024.7.10
# via typeid-python # via typeid-python
w3lib==2.2.1 w3lib==2.2.1
@ -517,13 +511,11 @@ wcwidth==0.2.13
# via # via
# ftfy # ftfy
# prompt-toolkit # prompt-toolkit
websockets==13.1
# via yt-dlp
xlrd==2.0.1 xlrd==2.0.1
# via python-benedict # via python-benedict
xmltodict==0.14.2 xmltodict==0.14.2
# via python-benedict # via python-benedict
yt-dlp==2024.10.22 yt-dlp==2024.11.4
# via archivebox (pyproject.toml) # via archivebox (pyproject.toml)
zope-interface==7.1.1 zope-interface==7.1.1
# via twisted # via twisted

856
uv.lock

File diff suppressed because it is too large Load diff