fix plugin loading order, admin, abx-pkg

This commit is contained in:
Nick Sweeting 2024-11-16 06:43:06 -08:00
parent 210fd935d7
commit c8e186f21b
No known key found for this signature in database
78 changed files with 801 additions and 987 deletions

View file

@ -5,12 +5,10 @@ __command__ = 'archivebox update'
import sys
import argparse
from pathlib import Path
from typing import List, Optional, IO
from archivebox.misc.util import docstring
from archivebox.config import DATA_DIR
from ..index import (
from archivebox.index import (
LINK_FILTERS,
get_indexed_folders,
get_archived_folders,
@ -23,8 +21,16 @@ from ..index import (
get_corrupted_folders,
get_unrecognized_folders,
)
from ..logging_util import SmartFormatter, accept_stdin
from ..main import update
from archivebox.logging_util import SmartFormatter, accept_stdin
# from ..main import update
def update():
from archivebox.config.django import setup_django
setup_django()
from actors.orchestrator import Orchestrator
orchestrator = Orchestrator()
orchestrator.start()
@docstring(update.__doc__)
@ -116,20 +122,22 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
if not command.filter_patterns:
filter_patterns_str = accept_stdin(stdin)
update(
resume=command.resume,
only_new=command.only_new,
index_only=command.index_only,
overwrite=command.overwrite,
filter_patterns_str=filter_patterns_str,
filter_patterns=command.filter_patterns,
filter_type=command.filter_type,
status=command.status,
after=command.after,
before=command.before,
out_dir=Path(pwd) if pwd else DATA_DIR,
extractors=command.extract,
)
update()
# update(
# resume=command.resume,
# only_new=command.only_new,
# index_only=command.index_only,
# overwrite=command.overwrite,
# filter_patterns_str=filter_patterns_str,
# filter_patterns=command.filter_patterns,
# filter_type=command.filter_type,
# status=command.status,
# after=command.after,
# before=command.before,
# out_dir=Path(pwd) if pwd else DATA_DIR,
# extractors=command.extract,
# )
if __name__ == '__main__':

View file

@ -1,4 +1,4 @@
__package__ = 'config'
__package__ = 'archivebox.config'
__order__ = 200
from .paths import (

View file

@ -120,6 +120,8 @@ class ArchivingConfig(BaseConfigSet):
SAVE_ALLOWLIST: Dict[str, List[str]] = Field(default={}) # mapping of regex patterns to list of archive methods
SAVE_DENYLIST: Dict[str, List[str]] = Field(default={})
DEFAULT_PERSONA: str = Field(default='Default')
# GIT_DOMAINS: str = Field(default='github.com,bitbucket.org,gitlab.com,gist.github.com,codeberg.org,gitea.com,git.sr.ht')
# WGET_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' wget/{WGET_VERSION}')
# CURL_USER_AGENT: str = Field(default=lambda c: c['USER_AGENT'] + ' curl/{CURL_VERSION}')

View file

@ -86,10 +86,11 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
}
for plugin_id, plugin in abx.get_all_plugins().items():
if not plugin.hooks.get('get_BINARIES'):
plugin = benedict(plugin)
if not hasattr(plugin.plugin, 'get_BINARIES'):
continue
for binary in plugin.hooks.get_BINARIES().values():
for binary in plugin.plugin.get_BINARIES().values():
try:
installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
binary = installed_binary.load_from_db()
@ -214,9 +215,9 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
return 'black'
for plugin_id, plugin in abx.get_all_plugins().items():
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
plugin.hooks.get_BINPROVIDERS = getattr(plugin.plugin, 'get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = getattr(plugin.plugin, 'get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = getattr(plugin.plugin, 'get_CONFIG', lambda: {})
rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
rows['Version'].append(str(plugin.version))
@ -251,8 +252,10 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
plugins = abx.get_all_plugins()
plugin_id = None
for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
for check_plugin_id, loaded_plugin in plugins.items():
if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
plugin_id = check_plugin_id
break

View file

@ -1,5 +1,5 @@
__package__ = 'archivebox.core'
__order__ = 100
import abx
@abx.hookimpl

View file

@ -21,7 +21,7 @@ class SnapshotActor(ActorType[Snapshot]):
FINAL_STATES: ClassVar[list[State]] = SnapshotMachine.final_states # ['sealed']
STATE_FIELD_NAME: ClassVar[str] = Snapshot.state_field_name # status
MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 3
MAX_TICK_TIME: ClassVar[int] = 10
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10
@ -39,7 +39,7 @@ class ArchiveResultActor(ActorType[ArchiveResult]):
FINAL_STATES: ClassVar[list[State]] = ArchiveResultMachine.final_states # ['succeeded', 'failed', 'skipped']
STATE_FIELD_NAME: ClassVar[str] = ArchiveResult.state_field_name # status
MAX_CONCURRENT_ACTORS: ClassVar[int] = 6
MAX_CONCURRENT_ACTORS: ClassVar[int] = 1 # 6
MAX_TICK_TIME: ClassVar[int] = 60
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

View file

@ -39,7 +39,7 @@ class ArchiveResultInline(admin.TabularInline):
extra = 0
sort_fields = ('end_ts', 'extractor', 'output', 'status', 'cmd_version')
readonly_fields = ('id', 'result_id', 'completed', 'command', 'version')
fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'output')
fields = ('start_ts', 'end_ts', *readonly_fields, 'extractor', 'cmd', 'cmd_version', 'pwd', 'created_by', 'status', 'retry_at', 'output')
# exclude = ('id',)
ordering = ('end_ts',)
show_change_link = True
@ -105,11 +105,11 @@ class ArchiveResultInline(admin.TabularInline):
class ArchiveResultAdmin(ABIDModelAdmin):
list_display = ('start_ts', 'snapshot_info', 'tags_str', 'extractor', 'cmd_str', 'status', 'output_str')
sort_fields = ('start_ts', 'extractor', 'status')
list_display = ('abid', 'created_by', 'created_at', 'snapshot_info', 'tags_str', 'status', 'extractor', 'cmd_str', 'output_str')
sort_fields = ('abid', 'created_by', 'created_at', 'extractor', 'status')
readonly_fields = ('cmd_str', 'snapshot_info', 'tags_str', 'created_at', 'modified_at', 'abid_info', 'output_summary')
search_fields = ('id', 'abid', 'snapshot__url', 'extractor', 'output', 'cmd_version', 'cmd', 'snapshot__timestamp')
fields = ('snapshot', 'extractor', 'status', 'output', 'pwd', 'start_ts', 'end_ts', 'created_by', 'cmd_version', 'cmd', *readonly_fields)
fields = ('snapshot', 'extractor', 'status', 'retry_at', 'start_ts', 'end_ts', 'created_by', 'pwd', 'cmd_version', 'cmd', 'output', *readonly_fields)
autocomplete_fields = ['snapshot']
list_filter = ('status', 'extractor', 'start_ts', 'cmd_version')
@ -169,7 +169,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
result.output,
)
output_str += format_html('<a href="/archive/{}/index.html#all">See result files ...</a><br/><pre><code>', str(result.snapshot.timestamp))
path_from_output_str = (snapshot_dir / result.output)
path_from_output_str = (snapshot_dir / (result.output or ''))
output_str += format_html('<i style="padding: 1px">{}</i><b style="padding-right: 20px">/</b><i>{}</i><br/><hr/>', str(snapshot_dir), str(result.output))
if os.access(path_from_output_str, os.R_OK):
root_dir = str(path_from_output_str)

View file

@ -56,12 +56,12 @@ class SnapshotActionForm(ActionForm):
class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
list_display = ('created_at', 'title_str', 'files', 'size', 'url_str', 'crawl')
sort_fields = ('title_str', 'url_str', 'created_at', 'crawl')
list_display = ('created_at', 'title_str', 'status', 'files', 'size', 'url_str')
sort_fields = ('title_str', 'url_str', 'created_at', 'status', 'crawl')
readonly_fields = ('admin_actions', 'status_info', 'tags_str', 'imported_timestamp', 'created_at', 'modified_at', 'downloaded_at', 'abid_info', 'link_dir')
search_fields = ('id', 'url', 'abid', 'timestamp', 'title', 'tags__name')
list_filter = ('created_at', 'downloaded_at', 'archiveresult__status', 'created_by', 'tags__name')
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'crawl', *readonly_fields)
fields = ('url', 'title', 'created_by', 'bookmarked_at', 'status', 'retry_at', 'crawl', *readonly_fields)
ordering = ['-created_at']
actions = ['add_tags', 'remove_tags', 'update_titles', 'update_snapshots', 'resnapshot_snapshot', 'overwrite_snapshots', 'delete_snapshots']
inlines = [TagInline, ArchiveResultInline]

View file

@ -1,7 +1,7 @@
__package__ = 'archivebox.core'
from typing import Optional, Dict, Iterable
from typing import Optional, Dict, Iterable, Any
from django_stubs_ext.db.models import TypedModelMeta
import os
@ -20,20 +20,22 @@ from django.db.models import Case, When, Value, IntegerField
from django.contrib import admin
from django.conf import settings
from actors.models import ModelWithStateMachine
import abx
from archivebox.config import CONSTANTS
from abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField
from actors.models import ModelWithStateMachine
from queues.tasks import bg_archive_snapshot
from crawls.models import Crawl
# from machine.models import Machine, NetworkInterface
from archivebox.misc.system import get_dir_size
from archivebox.misc.util import parse_date, base_url
from ..index.schema import Link
from ..index.html import snapshot_icons
from ..extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE, EXTRACTORS
from archivebox.index.schema import Link
from archivebox.index.html import snapshot_icons
from archivebox.extractors import ARCHIVE_METHODS_INDEXING_PRECEDENCE
# class BaseModel(models.Model):
@ -195,13 +197,21 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
tags = models.ManyToManyField(Tag, blank=True, through=SnapshotTag, related_name='snapshot_set', through_fields=('snapshot', 'tag'))
title = models.CharField(max_length=512, null=True, blank=True, db_index=True)
keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at')
# config = models.JSONField(default=dict, null=False, blank=False, editable=True)
keys = ('url', 'timestamp', 'title', 'tags', 'downloaded_at', 'created_at', 'status', 'retry_at', 'abid', 'id')
archiveresult_set: models.Manager['ArchiveResult']
objects = SnapshotManager()
def save(self, *args, **kwargs):
if self.pk:
existing_snapshot = self.__class__.objects.filter(pk=self.pk).first()
if existing_snapshot and existing_snapshot.status == self.StatusChoices.SEALED:
if self.as_json() != existing_snapshot.as_json():
raise Exception(f'Snapshot {self.pk} is already sealed, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_snapshot.as_json()}')
if not self.bookmarked_at:
self.bookmarked_at = self.created_at or self._init_timestamp
@ -427,7 +437,7 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
ALL_EXTRACTORS = ['favicon', 'title', 'screenshot', 'headers', 'singlefile', 'dom', 'git', 'archive_org', 'readability', 'mercury', 'pdf', 'wget']
# config = get_scope_config(snapshot=self)
config = {'EXTRACTORS': ''}
config = {'EXTRACTORS': ','.join(ALL_EXTRACTORS)}
if config.get('EXTRACTORS', 'auto') == 'auto':
EXTRACTORS = ALL_EXTRACTORS
@ -438,10 +448,13 @@ class Snapshot(ABIDModel, ModelWithStateMachine):
for extractor in EXTRACTORS:
if not extractor:
continue
archiveresult, _created = ArchiveResult.objects.get_or_create(
archiveresult = ArchiveResult.objects.update_or_create(
snapshot=self,
extractor=extractor,
status=ArchiveResult.INITIAL_STATE,
defaults={
'retry_at': timezone.now(),
},
)
archiveresults.append(archiveresult)
return archiveresults
@ -560,6 +573,8 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
# uplink = models.ForeignKey(NetworkInterface, on_delete=models.SET_NULL, null=True, blank=True, verbose_name='Network Interface Used')
objects = ArchiveResultManager()
keys = ('snapshot_id', 'extractor', 'cmd', 'pwd', 'cmd_version', 'output', 'start_ts', 'end_ts', 'created_at', 'status', 'retry_at', 'abid', 'id')
class Meta(TypedModelMeta):
verbose_name = 'Archive Result'
@ -576,6 +591,16 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
def __str__(self):
return repr(self)
def save(self, *args, **kwargs):
# if (self.pk and self.__class__.objects.filter(pk=self.pk).values_list('status', flat=True)[0] in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]):
# raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further.')
if self.pk:
existing_archiveresult = self.__class__.objects.filter(pk=self.pk).first()
if existing_archiveresult and existing_archiveresult.status in [self.StatusChoices.FAILED, self.StatusChoices.SUCCEEDED, self.StatusChoices.SKIPPED]:
if self.as_json() != existing_archiveresult.as_json():
raise Exception(f'ArchiveResult {self.pk} is in a final state, it cannot be modified any further. NEW: {self.as_json()} != Existing: {existing_archiveresult.as_json()}')
super().save(*args, **kwargs)
# TODO: finish connecting machine.models
# @cached_property
@ -603,36 +628,53 @@ class ArchiveResult(ABIDModel, ModelWithStateMachine):
return f'/{self.snapshot.archive_path}/{self.output_path()}'
@property
def extractor_module(self):
return EXTRACTORS[self.extractor]
def extractor_module(self) -> Any | None:
return abx.as_dict(abx.pm.hook.get_EXTRACTORS()).get(self.extractor, None)
def output_path(self) -> str:
def output_path(self) -> str | None:
"""return the canonical output filename or directory name within the snapshot dir"""
return self.extractor_module.get_output_path()
try:
return self.extractor_module.get_output_path(self.snapshot)
except Exception as e:
print(f'Error getting output path for {self.extractor} extractor: {e}')
return None
def embed_path(self) -> str:
def embed_path(self) -> str | None:
"""
return the actual runtime-calculated path to the file on-disk that
should be used for user-facing iframe embeds of this result
"""
if get_embed_path_func := getattr(self.extractor_module, 'get_embed_path', None):
return get_embed_path_func(self)
return self.extractor_module.get_output_path()
try:
return self.extractor_module.get_embed_path(self)
except Exception as e:
print(f'Error getting embed path for {self.extractor} extractor: {e}')
return None
def legacy_output_path(self):
link = self.snapshot.as_link()
return link.canonical_outputs().get(f'{self.extractor}_path')
def output_exists(self) -> bool:
return os.path.exists(self.output_path())
output_path = self.output_path()
return bool(output_path and os.path.exists(output_path))
def create_output_dir(self):
snap_dir = self.snapshot_dir
snap_dir = Path(self.snapshot_dir)
snap_dir.mkdir(parents=True, exist_ok=True)
return snap_dir / self.output_path()
output_path = self.output_path()
if output_path:
(snap_dir / output_path).mkdir(parents=True, exist_ok=True)
else:
raise ValueError(f'Not able to calculate output path for {self.extractor} extractor in {snap_dir}')
return snap_dir / output_path
def as_json(self, *args) -> dict:
args = args or self.keys
return {
key: getattr(self, key)
for key in args
}
# def get_storage_dir(self, create=True, symlink=True):
# date_str = self.snapshot.bookmarked_at.strftime('%Y%m%d')

View file

@ -37,25 +37,44 @@ class SnapshotMachine(StateMachine, strict_states=True):
super().__init__(snapshot, *args, **kwargs)
def can_start(self) -> bool:
return self.snapshot.url
can_start = bool(self.snapshot.url and (self.snapshot.retry_at < timezone.now()))
if not can_start:
print(f'SnapshotMachine[{self.snapshot.ABID}].can_start() False: {self.snapshot.url} {self.snapshot.retry_at} {timezone.now()}')
return can_start
def is_finished(self) -> bool:
# if no archiveresults exist yet, it's not finished
if not self.snapshot.archiveresult_set.exists():
return False
# if archiveresults exist but are still pending, it's not finished
if self.snapshot.pending_archiveresults().exists():
return False
# otherwise archiveresults exist and are all finished, so it's finished
return True
@started.enter
def on_started(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
self.snapshot.create_pending_archiveresults()
self.snapshot.bump_retry_at(seconds=60)
def on_transition(self, event, state):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_transition() {event} -> {state}')
@queued.enter
def enter_queued(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_queued(): snapshot.retry_at = now()')
self.snapshot.status = Snapshot.StatusChoices.QUEUED
self.snapshot.retry_at = timezone.now()
self.snapshot.save()
@started.enter
def enter_started(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_started(): snapshot.create_pending_archiveresults() + snapshot.bump_retry_at(+60s)')
self.snapshot.status = Snapshot.StatusChoices.STARTED
self.snapshot.bump_retry_at(seconds=60)
self.snapshot.save()
self.snapshot.create_pending_archiveresults()
@sealed.enter
def on_sealed(self):
def enter_sealed(self):
print(f'SnapshotMachine[{self.snapshot.ABID}].on_sealed(): snapshot.retry_at=None')
self.snapshot.status = Snapshot.StatusChoices.SEALED
self.snapshot.retry_at = None
self.snapshot.save()
@ -95,7 +114,7 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
super().__init__(archiveresult, *args, **kwargs)
def can_start(self) -> bool:
return self.archiveresult.snapshot and self.archiveresult.snapshot.STATE == Snapshot.active_state
return self.archiveresult.snapshot and (self.archiveresult.retry_at < timezone.now())
def is_succeeded(self) -> bool:
return self.archiveresult.output_exists()
@ -109,29 +128,45 @@ class ArchiveResultMachine(StateMachine, strict_states=True):
def is_finished(self) -> bool:
return self.is_failed() or self.is_succeeded()
@queued.enter
def enter_queued(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_queued(): archiveresult.retry_at = now()')
self.archiveresult.status = ArchiveResult.StatusChoices.QUEUED
self.archiveresult.retry_at = timezone.now()
self.archiveresult.save()
@started.enter
def on_started(self):
def enter_started(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_started(): archiveresult.start_ts + create_output_dir() + bump_retry_at(+60s)')
self.archiveresult.status = ArchiveResult.StatusChoices.STARTED
self.archiveresult.start_ts = timezone.now()
self.archiveresult.create_output_dir()
self.archiveresult.bump_retry_at(seconds=60)
self.archiveresult.save()
self.archiveresult.create_output_dir()
@backoff.enter
def on_backoff(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.bump_retry_at(+60s)')
def enter_backoff(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_backoff(): archiveresult.retries += 1, archiveresult.bump_retry_at(+60s), archiveresult.end_ts = None')
self.archiveresult.status = ArchiveResult.StatusChoices.BACKOFF
self.archiveresult.retries = getattr(self.archiveresult, 'retries', 0) + 1
self.archiveresult.bump_retry_at(seconds=60)
self.archiveresult.end_ts = None
self.archiveresult.save()
@succeeded.enter
def on_succeeded(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.end_ts')
def enter_succeeded(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_succeeded(): archiveresult.retry_at = None, archiveresult.end_ts = now()')
self.archiveresult.status = ArchiveResult.StatusChoices.SUCCEEDED
self.archiveresult.retry_at = None
self.archiveresult.end_ts = timezone.now()
self.archiveresult.save()
@failed.enter
def on_failed(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archiveresult.end_ts')
def enter_failed(self):
print(f'ArchiveResultMachine[{self.archiveresult.ABID}].on_failed(): archivebox.retry_at = None, archiveresult.end_ts = now()')
self.archiveresult.status = ArchiveResult.StatusChoices.FAILED
self.archiveresult.retry_at = None
self.archiveresult.end_ts = timezone.now()
self.archiveresult.save()

View file

@ -102,7 +102,8 @@ class SnapshotView(View):
# iterate through all the files in the snapshot dir and add the biggest ones to1 the result list
snap_dir = Path(snapshot.link_dir)
assert os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK)
if not os.path.isdir(snap_dir) and os.access(snap_dir, os.R_OK):
return {}
for result_file in (*snap_dir.glob('*'), *snap_dir.glob('*/*')):
extension = result_file.suffix.lstrip('.').lower()
@ -504,7 +505,7 @@ def find_config_section(key: str) -> str:
if key in CONSTANTS_CONFIG:
return 'CONSTANT'
matching_sections = [
section_id for section_id, section in CONFIGS.items() if key in section.model_fields
section_id for section_id, section in CONFIGS.items() if key in dict(section)
]
section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section
@ -518,8 +519,9 @@ def find_config_default(key: str) -> str:
default_val = None
for config in CONFIGS.values():
if key in config.model_fields:
default_val = config.model_fields[key].default
if key in dict(config):
default_field = getattr(config, 'model_fields', dict(config))[key]
default_val = default_field.default if hasattr(default_field, 'default') else default_field
break
if isinstance(default_val, Callable):
@ -529,7 +531,6 @@ def find_config_default(key: str) -> str:
else:
default_val = str(default_val)
return default_val
def find_config_type(key: str) -> str:
@ -567,7 +568,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
}
for section_id, section in reversed(list(CONFIGS.items())):
for key, field in section.model_fields.items():
for key in dict(section).keys():
rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
@ -580,7 +581,7 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
for key in CONSTANTS_CONFIG.keys():
rows['Section'].append(section) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', repr(CONSTANTS_CONFIG[key]))))
rows['Type'].append(format_html('<code>{}</code>', getattr(type(CONSTANTS_CONFIG[key]), '__name__', str(CONSTANTS_CONFIG[key]))))
rows['Value'].append(format_html('<code>{}</code>', CONSTANTS_CONFIG[key]) if key_is_safe(key) else '******** (redacted)')
rows['Default'].append(mark_safe(f'<a href="https://github.com/search?q=repo%3AArchiveBox%2FArchiveBox+path%3Aconfig+{key}&type=code"><code style="text-decoration: underline">{find_config_default(key) or "See here..."}</code></a>'))
# rows['Documentation'].append(mark_safe(f'Wiki: <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#{key.lower()}">{key}</a>'))
@ -642,13 +643,13 @@ def live_config_value_view(request: HttpRequest, key: str, **kwargs) -> ItemCont
<code>{find_config_default(key) or '↗️ See in ArchiveBox source code...'}</code>
</a>
<br/><br/>
<p style="display: {"block" if key in FLAT_CONFIG else "none"}">
<p style="display: {"block" if key in FLAT_CONFIG and key not in CONSTANTS_CONFIG else "none"}">
<i>To change this value, edit <code>data/ArchiveBox.conf</code> or run:</i>
<br/><br/>
<code>archivebox config --set {key}="{
val.strip("'")
if (val := find_config_default(key)) else
(repr(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
(str(FLAT_CONFIG[key] if key_is_safe(key) else '********')).strip("'")
}"</code>
</p>
'''),

View file

@ -1,4 +1,5 @@
__package__ = 'archivebox.crawls'
__order__ = 100
import abx

View file

@ -18,6 +18,6 @@ class CrawlActor(ActorType[Crawl]):
FINAL_STATES: ClassVar[list[State]] = CrawlMachine.final_states
STATE_FIELD_NAME: ClassVar[str] = Crawl.state_field_name
MAX_CONCURRENT_ACTORS: ClassVar[int] = 3
MAX_CONCURRENT_ACTORS: ClassVar[int] = 1
MAX_TICK_TIME: ClassVar[int] = 10
CLAIM_FROM_TOP_N: ClassVar[int] = MAX_CONCURRENT_ACTORS * 10

View file

@ -150,8 +150,8 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
parser = (self.seed and self.seed.extractor) or 'auto'
created_at = self.created_at.strftime("%Y-%m-%d %H:%M") if self.created_at else '<no timestamp set>'
if self.id and self.seed:
return f'[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
return f'[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
return f'\\[{self.ABID}] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
return f'\\[{self.abid_prefix}****not*saved*yet****] {url[:64]} ({parser}) @ {created_at} ({self.label or "Untitled Crawl"})'
@classmethod
def from_seed(cls, seed: Seed, max_depth: int=0, persona: str='Default', tags_str: str='', config: dict|None=None, created_by: int|None=None):
@ -184,26 +184,27 @@ class Crawl(ABIDModel, ModelWithHealthStats, ModelWithStateMachine):
return '/api/v1/docs#/Core%20Models/api_v1_core_get_crawl'
def pending_snapshots(self) -> QuerySet['Snapshot']:
from core.models import Snapshot
return self.snapshot_set.exclude(status__in=Snapshot.FINAL_OR_ACTIVE_STATES)
return self.snapshot_set.filter(retry_at__isnull=False)
def pending_archiveresults(self) -> QuerySet['ArchiveResult']:
from core.models import ArchiveResult
snapshot_ids = self.snapshot_set.values_list('id', flat=True)
pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids).exclude(status__in=ArchiveResult.FINAL_OR_ACTIVE_STATES)
pending_archiveresults = ArchiveResult.objects.filter(snapshot_id__in=snapshot_ids, retry_at__isnull=True)
return pending_archiveresults
def create_root_snapshot(self) -> 'Snapshot':
from core.models import Snapshot
root_snapshot, _ = Snapshot.objects.get_or_create(
crawl=self,
root_snapshot, _ = Snapshot.objects.update_or_create(
url=self.seed.uri,
status=Snapshot.INITIAL_STATE,
retry_at=timezone.now(),
timestamp=str(timezone.now().timestamp()),
# config=self.seed.config,
defaults={
'crawl': self,
'status': Snapshot.INITIAL_STATE,
'retry_at': timezone.now(),
'timestamp': str(timezone.now().timestamp()),
# 'config': self.seed.config,
},
)
return root_snapshot

View file

@ -1,5 +1,7 @@
__package__ = 'archivebox.crawls'
from django.utils import timezone
from statemachine import State, StateMachine
from crawls.models import Crawl
@ -31,7 +33,7 @@ class CrawlMachine(StateMachine, strict_states=True):
super().__init__(crawl, *args, **kwargs)
def can_start(self) -> bool:
return self.crawl.seed and self.crawl.seed.uri
return bool(self.crawl.seed and self.crawl.seed.uri and (self.retry_at < timezone.now()))
def is_finished(self) -> bool:
if not self.crawl.snapshot_set.exists():
@ -47,15 +49,17 @@ class CrawlMachine(StateMachine, strict_states=True):
# return "before_transition_return"
@started.enter
def on_started(self):
def enter_started(self):
print(f'CrawlMachine[{self.crawl.ABID}].on_started(): crawl.create_root_snapshot() + crawl.bump_retry_at(+10s)')
self.crawl.create_root_snapshot()
self.crawl.status = Crawl.StatusChoices.STARTED
self.crawl.bump_retry_at(seconds=10)
self.crawl.save()
self.crawl.create_root_snapshot()
@sealed.enter
def on_sealed(self):
def enter_sealed(self):
print(f'CrawlMachine[{self.crawl.ABID}].on_sealed(): crawl.retry_at=None')
self.crawl.status = Crawl.StatusChoices.SEALED
self.crawl.retry_at = None
self.crawl.save()

View file

@ -11,7 +11,7 @@ from django.utils.functional import cached_property
import abx
import archivebox
from pydantic_pkgr import Binary, BinProvider
from abx_pkg import Binary, BinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
from .detect import get_host_guid, get_os_info, get_vm_info, get_host_network, get_host_stats
@ -323,7 +323,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
# whereas a loaded binary is a not-yet saved instance that may not have the same config
# why would we want to load a binary record from the db when it could be freshly loaded?
def load_from_db(self) -> Binary:
# TODO: implement defaults arg in pydantic_pkgr
# TODO: implement defaults arg in abx_pkg
# return self.BINARY.load(defaults={
# 'binprovider': self.BINPROVIDER,
# 'abspath': Path(self.abspath),

View file

@ -14,7 +14,7 @@ from crontab import CronTab, CronSlices
from django.db.models import QuerySet
from django.utils import timezone
from pydantic_pkgr import Binary
from abx_pkg import Binary
import abx
import archivebox

View file

@ -6,7 +6,7 @@ PKGS_DIR = Path(__file__).parent
VENDORED_PKGS = [
'abx',
# 'pydantic-pkgr',
# 'abx-pkg',
# ... everything else in archivebox/pkgs/* comes after ...
]

View file

@ -1,3 +1,4 @@
__package__ = 'abx_plugin_chrome'
__label__ = 'Chrome'
__author__ = 'ArchiveBox'
@ -25,10 +26,11 @@ def ready():
CHROME_CONFIG.validate()
# @abx.hookimpl
# def get_EXTRACTORS():
# return {
# 'pdf': PDF_EXTRACTOR,
# 'screenshot': SCREENSHOT_EXTRACTOR,
# 'dom': DOM_EXTRACTOR,
# }
@abx.hookimpl
def get_EXTRACTORS():
from .extractors import PDF_EXTRACTOR, SCREENSHOT_EXTRACTOR, DOM_EXTRACTOR
return {
'pdf': PDF_EXTRACTOR,
'screenshot': SCREENSHOT_EXTRACTOR,
'dom': DOM_EXTRACTOR,
}

View file

@ -4,7 +4,7 @@ from pathlib import Path
from typing import List, Optional
from pydantic import InstanceOf
from pydantic_pkgr import (
from abx_pkg import (
Binary,
BinProvider,
BinName,

View file

@ -3,7 +3,7 @@ from pathlib import Path
from typing import List, Optional
from pydantic import Field
from pydantic_pkgr import bin_abspath
from abx_pkg import bin_abspath
from abx_spec_config.base_configset import BaseConfigSet
from abx_plugin_default_binproviders import env

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
]
[build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_curl'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary
from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
]
[build-system]

View file

@ -3,7 +3,7 @@ import abx
from typing import Dict
from pydantic_pkgr import (
from abx_pkg import (
AptProvider,
BrewProvider,
EnvProvider,

View file

@ -6,8 +6,8 @@ readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-pkg>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
]
[build-system]

View file

@ -20,10 +20,10 @@ def get_CONFIG():
}
# @abx.hookimpl
# def get_EXTRACTORS():
# from .extractors import FAVICON_EXTRACTOR
@abx.hookimpl
def get_EXTRACTORS():
from .extractors import FAVICON_EXTRACTOR
# return {
# 'favicon': FAVICON_EXTRACTOR,
# }
return {
'favicon': FAVICON_EXTRACTOR,
}

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_git'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary
from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,15 +1,20 @@
__package__ = 'abx_plugin_git'
# from pathlib import Path
# from .binaries import GIT_BINARY
from pathlib import Path
# class GitExtractor(BaseExtractor):
# name: ExtractorName = 'git'
# binary: str = GIT_BINARY.name
from abx_pkg import BinName
# def get_output_path(self, snapshot) -> Path | None:
# return snapshot.as_link() / 'git'
from abx_spec_extractor import BaseExtractor, ExtractorName
# GIT_EXTRACTOR = GitExtractor()
from .binaries import GIT_BINARY
class GitExtractor(BaseExtractor):
name: ExtractorName = 'git'
binary: BinName = GIT_BINARY.name
def get_output_path(self, snapshot) -> Path | None:
return snapshot.as_link() / 'git'
GIT_EXTRACTOR = GitExtractor()

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
]

View file

@ -6,7 +6,7 @@ from typing import List
from pathlib import Path
from pydantic import InstanceOf
from pydantic_pkgr import BinaryOverrides, SemVer, Binary, BinProvider
from abx_pkg import BinaryOverrides, SemVer, Binary, BinProvider
from abx_plugin_default_binproviders import apt
from abx_plugin_pip.binproviders import SYS_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, LIB_PIP_BINPROVIDER, VENV_SITE_PACKAGES, LIB_SITE_PACKAGES, USER_SITE_PACKAGES, SYS_SITE_PACKAGES

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_mercury'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
from abx_pkg import BinProvider, BinName, BinaryOverrides, bin_abspath, Binary
from abx_plugin_default_binproviders import env

View file

@ -1,17 +1,20 @@
__package__ = 'abx_plugin_mercury'
# from pathlib import Path
from pathlib import Path
# from .binaries import MERCURY_BINARY
from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import MERCURY_BINARY
# class MercuryExtractor(BaseExtractor):
# name: ExtractorName = 'mercury'
# binary: str = MERCURY_BINARY.name
class MercuryExtractor(BaseExtractor):
name: ExtractorName = 'mercury'
binary: BinName = MERCURY_BINARY.name
# def get_output_path(self, snapshot) -> Path | None:
# return snapshot.link_dir / 'mercury' / 'content.html'
def get_output_path(self, snapshot) -> Path | None:
return snapshot.link_dir / 'mercury' / 'content.html'
# MERCURY_EXTRACTOR = MercuryExtractor()
MERCURY_EXTRACTOR = MercuryExtractor()

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_pkg.npm'
__package__ = 'abx_plugin_npm'
from typing import List
@ -6,7 +6,7 @@ from typing import List
from pydantic import InstanceOf
from benedict import benedict
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides
from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides
from abx_plugin_default_binproviders import get_BINPROVIDERS

View file

@ -2,7 +2,7 @@ import os
from pathlib import Path
from typing import Optional
from pydantic_pkgr import NpmProvider, PATHStr, BinProviderName
from abx_pkg import NpmProvider, PATHStr, BinProviderName
import abx

View file

@ -6,8 +6,8 @@ readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-pkg>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
]

View file

@ -1,5 +1,6 @@
__package__ = 'abx_plugin_pip'
__label__ = 'PIP'
__order__ = 200
import abx

View file

@ -9,7 +9,7 @@ from pydantic import InstanceOf, Field, model_validator
import django
import django.db.backends.sqlite3.base
from django.db.backends.sqlite3.base import Database as django_sqlite3 # type: ignore[import-type]
from pydantic_pkgr import BinProvider, Binary, BinName, BinaryOverrides, SemVer
from abx_pkg import BinProvider, Binary, BinName, BinaryOverrides, SemVer
from .binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER, env, apt, brew

View file

@ -6,7 +6,7 @@ from typing import Optional
from benedict import benedict
from pydantic_pkgr import PipProvider, BinName, BinProviderName
from abx_pkg import PipProvider, BinName, BinProviderName
import abx

View file

@ -6,9 +6,9 @@ readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-pkg>=0.5.4",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
"abx-plugin-default-binproviders>=2024.10.24",
"django>=5.0.0",
]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_playwright'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinName, BinProvider, Binary
from abx_pkg import BinName, BinProvider, Binary
from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

View file

@ -7,7 +7,7 @@ from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from pydantic import Field
from pydantic_pkgr import (
from abx_pkg import (
BinName,
BinProvider,
BinProviderName,

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic>=2.4.2",
"pydantic-pkgr>=0.5.4",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-pkg>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
"abx-spec-config>=0.1.0",
]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_puppeteer'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary
from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import env

View file

@ -4,7 +4,7 @@ from pathlib import Path
from typing import List, Optional, Dict, ClassVar
from pydantic import Field
from pydantic_pkgr import (
from abx_pkg import (
BinProvider,
BinName,
BinProviderName,

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
"abx-pkg>=0.5.4",
]
[build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_readability'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName
from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName
from abx_plugin_default_binproviders import env
from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

View file

@ -1,19 +1,19 @@
# __package__ = 'abx_plugin_readability'
# from pathlib import Path
from pathlib import Path
# from pydantic_pkgr import BinName
from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import READABILITY_BINARY
# from .binaries import READABILITY_BINARY
class ReadabilityExtractor(BaseExtractor):
name: ExtractorName = 'readability'
binary: BinName = READABILITY_BINARY.name
def get_output_path(self, snapshot) -> Path:
return Path(snapshot.link_dir) / 'readability' / 'content.html'
# class ReadabilityExtractor(BaseExtractor):
# name: str = 'readability'
# binary: BinName = READABILITY_BINARY.name
# def get_output_path(self, snapshot) -> Path:
# return Path(snapshot.link_dir) / 'readability' / 'content.html'
# READABILITY_EXTRACTOR = ReadabilityExtractor()
READABILITY_EXTRACTOR = ReadabilityExtractor()

View file

@ -3,7 +3,7 @@ __id__ = 'abx_plugin_readwise_extractor'
__label__ = 'Readwise API'
__version__ = '2024.10.27'
__author__ = 'ArchiveBox'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/plugins_extractor/readwise'
__homepage__ = 'https://github.com/ArchiveBox/ArchiveBox/tree/dev/archivebox/pkgs/abx-plugin-readwise-extractor'
__dependencies__ = []
import abx

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_ripgrep_search'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,7 +1,7 @@
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
from abx_pkg import Binary, BinProvider, BinaryOverrides, BinName, bin_abspath
from abx_plugin_default_binproviders import env
from abx_plugin_npm.binproviders import SYS_NPM_BINPROVIDER, LIB_NPM_BINPROVIDER

View file

@ -1,18 +1,21 @@
__package__ = 'abx_plugin_singlefile'
# from pathlib import Path
# from pydantic_pkgr import BinName
from pathlib import Path
# from .binaries import SINGLEFILE_BINARY
from abx_pkg import BinName
from abx_spec_extractor import BaseExtractor, ExtractorName
from .binaries import SINGLEFILE_BINARY
# class SinglefileExtractor(BaseExtractor):
# name: str = 'singlefile'
# binary: BinName = SINGLEFILE_BINARY.name
class SinglefileExtractor(BaseExtractor):
name: ExtractorName = 'singlefile'
binary: BinName = SINGLEFILE_BINARY.name
# def get_output_path(self, snapshot) -> Path:
# return Path(snapshot.link_dir) / 'singlefile.html'
def get_output_path(self, snapshot) -> Path:
return Path(snapshot.link_dir) / 'singlefile.html'
# SINGLEFILE_EXTRACTOR = SinglefileExtractor()
SINGLEFILE_EXTRACTOR = SinglefileExtractor()

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
"abx-pkg>=0.5.4",
]
[build-system]

View file

@ -3,7 +3,7 @@ __package__ = 'abx_plugin_sonic_search'
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinaryOverrides, BinName, Binary
from abx_pkg import BinProvider, BinaryOverrides, BinName, Binary
from abx_plugin_default_binproviders import brew, env

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_search.sonic'
__package__ = 'abx_plugin_sonic_search'
from typing import List, Generator, cast

View file

@ -7,9 +7,9 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
"abx-spec-searchbackend>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-pkg>=0.5.4",
]
[build-system]

View file

@ -7,3 +7,11 @@ import abx
# return {
# 'title_extractor': TITLE_EXTRACTOR_CONFIG
# }
@abx.hookimpl
def get_EXTRACTORS():
from .extractors import TITLE_EXTRACTOR
return {
'title': TITLE_EXTRACTOR,
}

View file

@ -4,7 +4,7 @@ from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, Binary
from abx_pkg import BinProvider, BinName, Binary
from abx_plugin_default_binproviders import apt, brew, env

View file

@ -1,35 +1,37 @@
__package__ = 'abx_plugin_wget'
# from pathlib import Path
from pathlib import Path
# from pydantic_pkgr import BinName
from abx_pkg import BinName
# from .binaries import WGET_BINARY
# from .wget_util import wget_output_path
from abx_spec_extractor import BaseExtractor, ExtractorName
# class WgetExtractor(BaseExtractor):
# name: ExtractorName = 'wget'
# binary: BinName = WGET_BINARY.name
from .binaries import WGET_BINARY
from .wget_util import wget_output_path
# def get_output_path(self, snapshot) -> Path | None:
# wget_index_path = wget_output_path(snapshot.as_link())
# if wget_index_path:
# return Path(wget_index_path)
# return None
class WgetExtractor(BaseExtractor):
name: ExtractorName = 'wget'
binary: BinName = WGET_BINARY.name
# WGET_EXTRACTOR = WgetExtractor()
def get_output_path(self, snapshot) -> Path | None:
wget_index_path = wget_output_path(snapshot.as_link())
if wget_index_path:
return Path(wget_index_path)
return None
WGET_EXTRACTOR = WgetExtractor()
# class WarcExtractor(BaseExtractor):
# name: ExtractorName = 'warc'
# binary: BinName = WGET_BINARY.name
class WarcExtractor(BaseExtractor):
name: ExtractorName = 'warc'
binary: BinName = WGET_BINARY.name
# def get_output_path(self, snapshot) -> Path | None:
# warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
# if warc_files:
# return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
# return None
def get_output_path(self, snapshot) -> Path | None:
warc_files = list((Path(snapshot.link_dir) / 'warc').glob('*.warc.gz'))
if warc_files:
return sorted(warc_files, key=lambda x: x.stat().st_size, reverse=True)[0]
return None
# WARC_EXTRACTOR = WarcExtractor()
WARC_EXTRACTOR = WarcExtractor()

View file

@ -7,7 +7,7 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.0",
]
[build-system]

View file

@ -4,7 +4,7 @@ import subprocess
from typing import List
from pydantic import InstanceOf
from pydantic_pkgr import BinProvider, BinName, BinaryOverrides, Binary
from abx_pkg import BinProvider, BinName, BinaryOverrides, Binary
from abx_plugin_default_binproviders import apt, brew, env
from abx_plugin_pip.binproviders import LIB_PIP_BINPROVIDER, VENV_PIP_BINPROVIDER, SYS_PIP_BINPROVIDER

View file

@ -1,4 +1,4 @@
__package__ = 'plugins_extractor.ytdlp'
__package__ = 'abx_plugin_ytdlp'
from typing import List

View file

@ -7,8 +7,8 @@ requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"abx-spec-config>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"pydantic-pkgr>=0.5.4",
"abx-spec-abx-pkg>=0.1.0",
"abx-pkg>=0.5.4",
]
[build-system]

View file

@ -11,13 +11,13 @@ from typing import cast
import abx
from abx_spec_config import ConfigPluginSpec
from abx_spec_pydantic_pkgr import PydanticPkgrPluginSpec
from abx_spec_abx_pkg import AbxPkgPluginSpec
from abx_spec_django import DjangoPluginSpec
from abx_spec_searchbackend import SearchBackendPluginSpec
class ArchiveBoxPluginSpec(ConfigPluginSpec, PydanticPkgrPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
class ArchiveBoxPluginSpec(ConfigPluginSpec, AbxPkgPluginSpec, DjangoPluginSpec, SearchBackendPluginSpec):
"""
ArchiveBox plugins can use any of the hooks from the Config, PydanticPkgr, and Django plugin specs.
ArchiveBox plugins can use any of the hooks from the Config, AbxPkg, and Django plugin specs.
"""
pass

View file

@ -2,10 +2,12 @@ __order__ = 100
import os
from pathlib import Path
from typing import Dict, Any, cast
from typing import Any, cast, TYPE_CHECKING
from benedict import benedict
if TYPE_CHECKING:
from archivebox.config.constants import ConstantsDict
import abx
@ -13,38 +15,43 @@ from .base_configset import BaseConfigSet, ConfigKeyStr
class ConfigPluginSpec:
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_collection_config_path(self) -> Path:
def get_collection_config_path() -> Path:
return Path(os.getcwd()) / "ArchiveBox.conf"
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_system_config_path(self) -> Path:
def get_system_config_path() -> Path:
return Path('~/.config/abx/abx.conf').expanduser()
@staticmethod
@abx.hookspec
@abx.hookimpl
def get_CONFIG(self) -> Dict[abx.PluginId, BaseConfigSet]:
def get_CONFIG() -> dict[abx.PluginId, 'BaseConfigSet | ConstantsDict']:
from archivebox import CONSTANTS
"""Get the config for a single plugin -> {plugin_id: PluginConfigSet()}"""
return {
# override this in your plugin to return your plugin's config, e.g.
# 'ytdlp': YtdlpConfig(...),
'CONSTANTS': CONSTANTS,
}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_CONFIGS(self) -> Dict[abx.PluginId, BaseConfigSet]:
def get_CONFIGS() -> dict[abx.PluginId, BaseConfigSet]:
"""Get the config for all plugins by plugin_id -> {plugin_abc: PluginABCConfigSet(), plugin_xyz: PluginXYZConfigSet(), ...}"""
return abx.as_dict(pm.hook.get_CONFIG())
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_FLAT_CONFIG(self) -> Dict[ConfigKeyStr, Any]:
def get_FLAT_CONFIG() -> dict[ConfigKeyStr, Any]:
"""Get the flat config assembled from all plugins config -> {SOME_KEY: 'someval', 'OTHER_KEY': 'otherval', ...}"""
return benedict({
key: value
@ -52,9 +59,10 @@ class ConfigPluginSpec:
for key, value in benedict(configset).items()
})
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_SCOPE_CONFIG(self, extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> Dict[ConfigKeyStr, Any]:
def get_SCOPE_CONFIG(extra=None, archiveresult=None, snapshot=None, crawl=None, user=None, collection=..., environment=..., machine=..., default=...) -> dict[ConfigKeyStr, Any]:
"""Get the config as it applies to you right now, based on the current context"""
return benedict({
**pm.hook.get_default_config(default=default),
@ -69,35 +77,41 @@ class ConfigPluginSpec:
**(extra or {}),
})
@staticmethod
# @abx.hookspec(firstresult=True)
# @abx.hookimpl
# def get_request_config(self, request) -> dict:
# def get_request_config(request) -> dict:
# session = getattr(request, 'session', None)
# return getattr(session, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_archiveresult_config(self, archiveresult) -> Dict[ConfigKeyStr, Any]:
def get_archiveresult_config(archiveresult) -> dict[ConfigKeyStr, Any]:
return getattr(archiveresult, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_snapshot_config(self, snapshot) -> Dict[ConfigKeyStr, Any]:
def get_snapshot_config(snapshot) -> dict[ConfigKeyStr, Any]:
return getattr(snapshot, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_crawl_config(self, crawl) -> Dict[ConfigKeyStr, Any]:
def get_crawl_config(crawl) -> dict[ConfigKeyStr, Any]:
return getattr(crawl, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_user_config(self, user=None) -> Dict[ConfigKeyStr, Any]:
def get_user_config(user=None) -> dict[ConfigKeyStr, Any]:
return getattr(user, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_collection_config(self, collection=...) -> Dict[ConfigKeyStr, Any]:
def get_collection_config(collection=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the collection config from the active data/ArchiveBox.conf file
# {} = empty dict, override to ignore the collection config
return benedict({
@ -106,9 +120,10 @@ class ConfigPluginSpec:
for key, value in configset.from_collection().items()
}) if collection == ... else collection
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_environment_config(self, environment=...) -> Dict[ConfigKeyStr, Any]:
def get_environment_config(environment=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the environment config from the active environment variables
# {} = empty dict, override to ignore the environment config
return benedict({
@ -117,18 +132,20 @@ class ConfigPluginSpec:
for key, value in configset.from_environment().items()
}) if environment == ... else environment
@staticmethod
# @abx.hookspec(firstresult=True)
# @abx.hookimpl
# def get_machine_config(self, machine=...) -> dict:
# def get_machine_config(machine=...) -> dict:
# # ... = ellipsis, means automatically get the machine config from the currently executing machine
# # {} = empty dict, override to ignore the machine config
# if machine == ...:
# machine = Machine.objects.get_current()
# return getattr(machine, 'config', None) or {}
@staticmethod
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_default_config(self, default=...) -> Dict[ConfigKeyStr, Any]:
def get_default_config(default=...) -> dict[ConfigKeyStr, Any]:
# ... = ellipsis, means automatically get the machine config from the currently executing machine
# {} = empty dict, override to ignore the machine config
return benedict({

View file

@ -1,4 +1,3 @@
__order__ = 300
import abx
from typing import List, Dict, Any, cast
@ -6,6 +5,8 @@ from typing import List, Dict, Any, cast
###########################################################################################
class DjangoPluginSpec:
__order__ = 10
@abx.hookspec
def get_INSTALLED_APPS() -> List[str]:
return ['abx_spec_django']

View file

@ -1,10 +1,12 @@
__order__ = 10
import os
from typing import Optional, List, Annotated, Tuple
from pathlib import Path
from pydantic import AfterValidator
from pydantic_pkgr import BinName
from abx_pkg import BinName
import abx
@ -23,7 +25,7 @@ CmdArgsList = Annotated[List[str] | Tuple[str, ...], AfterValidator(assert_no_em
@abx.hookspec
@abx.hookimpl
def get_EXTRACTORS():
return []
return {}
@abx.hookspec
@abx.hookimpl

View file

@ -1,114 +0,0 @@
__order__ = 200
import os
from typing import Dict, cast
from pathlib import Path
from pydantic_pkgr import Binary, BinProvider
import abx
from abx_spec_config import ConfigPluginSpec
###########################################################################################
class PydanticPkgrPluginSpec:
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_LIB_DIR(self) -> Path:
"""Get the directory where shared runtime libraries/dependencies should be installed"""
FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
LIB_DIR = Path(FLAT_CONFIG.get('LIB_DIR', '/usr/local/share/abx'))
return LIB_DIR
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BIN_DIR(self) -> Path:
"""Get the directory where binaries should be symlinked to"""
FLAT_CONFIG = pm.hook.get_FLAT_CONFIG()
LIB_DIR = pm.hook.get_LIB_DIR()
BIN_DIR = Path(FLAT_CONFIG.get('BIN_DIR') or LIB_DIR / 'bin')
return BIN_DIR
@abx.hookspec
@abx.hookimpl
def get_BINPROVIDERS(self) -> Dict[str, BinProvider]:
return {
# to be implemented by plugins, e.g.:
# 'npm': NpmBinProvider(npm_prefix=Path('/usr/local/share/abx/npm')),
}
@abx.hookspec
@abx.hookimpl
def get_BINARIES(self) -> Dict[str, Binary]:
return {
# to be implemented by plugins, e.g.:
# 'yt-dlp': Binary(name='yt-dlp', binproviders=[npm]),
}
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BINPROVIDER(self, binprovider_name: str) -> BinProvider:
"""Get a specific BinProvider by name"""
return abx.as_dict(pm.hook.get_BINPROVIDERS())[binprovider_name]
@abx.hookspec(firstresult=True)
@abx.hookimpl
def get_BINARY(self, bin_name: str) -> Binary:
"""Get a specific Binary by name"""
return abx.as_dict(pm.hook.get_BINARIES())[bin_name]
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_load(self, binary: Binary, **kwargs) -> Binary:
"""Load a binary from the filesystem (override to load a binary from a different source, e.g. DB, cache, etc.)"""
loaded_binary = binary.load(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_install(self, binary: Binary, **kwargs) -> Binary:
"""Override to change how a binary is installed (e.g. by downloading from a remote source, etc.)"""
loaded_binary = binary.install(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_load_or_install(self, binary: Binary, **kwargs) -> Binary:
"""Override to change how a binary is loaded or installed (e.g. by downloading from a remote source, etc.)"""
loaded_binary = binary.load_or_install(**kwargs)
pm.hook.binary_symlink_to_bin_dir(binary=loaded_binary)
return loaded_binary
@abx.hookspec(firstresult=True)
@abx.hookimpl
def binary_symlink_to_bin_dir(self, binary: Binary, bin_dir: Path | None=None):
if not (binary.abspath and os.path.isfile(binary.abspath)):
return
BIN_DIR = pm.hook.get_BIN_DIR()
try:
BIN_DIR.mkdir(parents=True, exist_ok=True)
symlink = BIN_DIR / binary.name
symlink.unlink(missing_ok=True)
symlink.symlink_to(binary.abspath)
symlink.chmod(0o777) # make sure its executable by everyone
except Exception:
# print(f'[red]:warning: Failed to symlink {symlink} -> {binary.abspath}[/red] {err}')
# not actually needed, we can just run without it
pass
PLUGIN_SPEC = PydanticPkgrPluginSpec
class RequiredSpecsAvailable(ConfigPluginSpec, PydanticPkgrPluginSpec):
pass
TypedPluginManager = abx.ABXPluginManager[RequiredSpecsAvailable]
pm = cast(TypedPluginManager, abx.pm)

View file

@ -1,17 +0,0 @@
[project]
name = "abx-spec-pydantic-pkgr"
version = "0.1.0"
description = "The ABX plugin specification for Binaries and BinProviders"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"abx>=0.1.0",
"pydantic-pkgr>=0.5.4",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project.entry-points.abx]
abx_spec_pydantic_pkgr = "abx_spec_pydantic_pkgr"

View file

@ -25,6 +25,9 @@ class BaseSearchBackend(abc.ABC):
class SearchBackendPluginSpec:
__order__ = 10
@staticmethod
@abx.hookspec
@abx.hookimpl
def get_SEARCHBACKENDS() -> Dict[abx.PluginId, BaseSearchBackend]:

View file

@ -244,10 +244,12 @@ def get_plugin_order(plugin: PluginId | Path | ModuleType | Type) -> Tuple[int,
except FileNotFoundError:
pass
default_order = 10 if '_spec_' in str(plugin_dir).lower() else 999
if plugin_module:
order = getattr(plugin_module, '__order__', 999)
order = getattr(plugin_module, '__order__', default_order)
else:
order = 999
order = default_order
assert order is not None
assert plugin_dir
@ -270,7 +272,10 @@ def get_plugin(plugin: PluginId | ModuleType | Type) -> PluginInfo:
elif inspect.isclass(plugin):
module = inspect.getmodule(plugin)
else:
raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
plugin = type(plugin)
module = inspect.getmodule(plugin)
# raise ValueError(f'Invalid plugin, must be a module, class, or plugin ID (package name): {plugin}')
assert module
@ -416,9 +421,14 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
PLUGINS_TO_LOAD = []
LOADED_PLUGINS = {}
for plugin in plugins:
plugin_info = get_plugin(plugin)
assert plugin_info, f'No plugin metadata found for {plugin}'
plugin_infos = sorted([
get_plugin(plugin)
for plugin in plugins
], key=lambda plugin: plugin.get('order', 999))
for plugin_info in plugin_infos:
assert plugin_info, 'No plugin metadata found for plugin'
assert 'id' in plugin_info and 'module' in plugin_info
if plugin_info['module'] in pm.get_plugins():
LOADED_PLUGINS[plugin_info['id']] = plugin_info
@ -431,7 +441,7 @@ def load_plugins(plugins: Iterable[PluginId | ModuleType | Type] | Dict[PluginId
for plugin_info in PLUGINS_TO_LOAD:
pm.register(plugin_info['module'])
LOADED_PLUGINS[plugin_info['id']] = plugin_info
# print(f' √ Loaded plugin: {plugin_id}')
print(f' √ Loaded plugin: {plugin_info["id"]}')
return benedict(LOADED_PLUGINS)
@cache

View file

@ -1,5 +1,6 @@
__package__ = 'archivebox.seeds'
__order__ = 100
import abx

View file

@ -1,6 +1,6 @@
[project]
name = "archivebox"
version = "0.8.6rc1"
version = "0.8.6rc2"
requires-python = ">=3.10"
description = "Self-hosted internet archiving solution."
authors = [{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"}]
@ -80,13 +80,13 @@ dependencies = [
"django-taggit==6.1.0",
"base32-crockford==0.3.0",
"platformdirs>=4.3.6",
"pydantic-pkgr>=0.5.4",
"abx-pkg>=0.6.0",
"pocket>=0.3.6",
"sonic-client>=1.0.0",
"yt-dlp>=2024.8.6", # for: media"
############# Plugin Dependencies ################
"abx>=0.1.0",
"abx-spec-pydantic-pkgr>=0.1.0",
"abx-spec-abx-pkg>=0.1.1",
"abx-spec-config>=0.1.0",
"abx-spec-archivebox>=0.1.0",
"abx-spec-django>=0.1.0",
@ -178,10 +178,10 @@ dev-dependencies = [
]
[tool.uv.sources]
# pydantic-pkgr = { workspace = true }
# abx-pkg = { workspace = true }
abx = { workspace = true }
abx-spec-pydantic-pkgr = { workspace = true }
abx-spec-abx-pkg = { workspace = true }
abx-spec-config = { workspace = true }
abx-spec-archivebox = { workspace = true }
abx-spec-django = { workspace = true }

View file

@ -1,6 +1,6 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml --all-extras -o requirements.txt
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx
# via
# archivebox (pyproject.toml)
# abx-plugin-archivedotorg
@ -24,65 +24,65 @@
# abx-plugin-title
# abx-plugin-wget
# abx-plugin-ytdlp
# abx-spec-abx-pkg
# abx-spec-archivebox
# abx-spec-config
# abx-spec-django
# abx-spec-extractor
# abx-spec-pydantic-pkgr
# abx-spec-searchbackend
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-archivedotorg
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-archivedotorg
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-chrome
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-chrome
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-curl
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-curl
# via
# archivebox (pyproject.toml)
# abx-plugin-archivedotorg
# abx-plugin-favicon
# abx-plugin-title
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-default-binproviders
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-default-binproviders
# via
# archivebox (pyproject.toml)
# abx-plugin-git
# abx-plugin-npm
# abx-plugin-pip
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-favicon
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-favicon
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-git
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-git
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-htmltotext
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-htmltotext
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ldap-auth
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ldap-auth
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-mercury
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-mercury
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-npm
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-npm
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-pip
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-pip
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-playwright
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-playwright
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-puppeteer
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-puppeteer
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-readability
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-readability
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ripgrep-search
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ripgrep-search
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-singlefile
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-singlefile
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sonic-search
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sonic-search
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-sqlitefts-search
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-sqlitefts-search
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-title
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-title
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-wget
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-wget
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-plugin-ytdlp
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-plugin-ytdlp
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-archivebox
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-archivebox
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-config
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-config
# via
# archivebox (pyproject.toml)
# abx-plugin-archivedotorg
@ -105,13 +105,13 @@
# abx-plugin-title
# abx-plugin-wget
# abx-plugin-ytdlp
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-django
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-django
# via
# archivebox (pyproject.toml)
# abx-plugin-ldap-auth
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-extractor
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-extractor
# via archivebox (pyproject.toml)
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-pydantic-pkgr
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-abx-pkg
# via
# archivebox (pyproject.toml)
# abx-plugin-chrome
@ -126,12 +126,24 @@
# abx-plugin-sonic-search
# abx-plugin-wget
# abx-plugin-ytdlp
-e file:///Users/squash/Local/Code/archiveboxes/archivebox7/archivebox/pkgs/abx-spec-searchbackend
-e file:///Volumes/NVME/Users/squash/Local/Code/archiveboxes/ArchiveBox7/archivebox/pkgs/abx-spec-searchbackend
# via
# archivebox (pyproject.toml)
# abx-plugin-ripgrep-search
# abx-plugin-sonic-search
# abx-plugin-sqlitefts-search
abx-pkg==0.6.0
# via
# archivebox (pyproject.toml)
# abx-plugin-default-binproviders
# abx-plugin-npm
# abx-plugin-pip
# abx-plugin-playwright
# abx-plugin-puppeteer
# abx-plugin-singlefile
# abx-plugin-sonic-search
# abx-plugin-ytdlp
# abx-spec-abx-pkg
annotated-types==0.7.0
# via pydantic
anyio==4.6.2.post1
@ -159,11 +171,9 @@ base32-crockford==0.3.0
# via archivebox (pyproject.toml)
beautifulsoup4==4.12.3
# via python-benedict
brotli==1.1.0
# via yt-dlp
bx-django-utils==81
bx-django-utils==82
# via django-huey-monitor
bx-py-utils==105
bx-py-utils==106
# via
# bx-django-utils
# django-huey-monitor
@ -172,10 +182,9 @@ certifi==2024.8.30
# httpcore
# httpx
# requests
# yt-dlp
cffi==1.17.1
# via cryptography
channels==4.1.0
channels==4.2.0
# via archivebox (pyproject.toml)
charset-normalizer==3.4.0
# via requests
@ -197,7 +206,7 @@ decorator==5.1.1
# via
# ipdb
# ipython
django==5.1.2
django==5.1.3
# via
# archivebox (pyproject.toml)
# abx
@ -270,7 +279,7 @@ ftfy==6.3.1
# via python-benedict
h11==0.14.0
# via httpcore
httpcore==1.0.6
httpcore==1.0.7
# via httpx
httpx==0.27.2
# via django-signal-webhooks
@ -297,7 +306,7 @@ ipython==8.29.0
# via
# archivebox (pyproject.toml)
# ipdb
jedi==0.19.1
jedi==0.19.2
# via ipython
libcst==1.5.0
# via django-autotyping
@ -309,8 +318,6 @@ matplotlib-inline==0.1.7
# via ipython
mdurl==0.1.2
# via markdown-it-py
mutagen==1.47.0
# via yt-dlp
mypy-extensions==1.0.0
# via archivebox (pyproject.toml)
openpyxl==3.1.5
@ -319,12 +326,14 @@ parso==0.8.4
# via jedi
pexpect==4.9.0
# via ipython
phonenumbers==8.13.48
phonenumbers==8.13.50
# via python-benedict
pip==24.3.1
# via abx-pkg
platformdirs==4.3.6
# via
# archivebox (pyproject.toml)
# pydantic-pkgr
# abx-pkg
pluggy==1.5.0
# via
# archivebox (pyproject.toml)
@ -352,34 +361,20 @@ pyasn1-modules==0.4.1
# service-identity
pycparser==2.22
# via cffi
pycryptodomex==3.21.0
# via yt-dlp
pydantic==2.9.2
# via
# abx-pkg
# abx-plugin-playwright
# abx-spec-config
# abx-spec-extractor
# abx-spec-searchbackend
# django-ninja
# django-pydantic-field
# pydantic-pkgr
# pydantic-settings
pydantic-core==2.23.4
# via
# abx-pkg
# pydantic
# pydantic-pkgr
pydantic-pkgr==0.5.4
# via
# archivebox (pyproject.toml)
# abx-plugin-default-binproviders
# abx-plugin-npm
# abx-plugin-pip
# abx-plugin-playwright
# abx-plugin-puppeteer
# abx-plugin-singlefile
# abx-plugin-sonic-search
# abx-plugin-ytdlp
# abx-spec-pydantic-pkgr
pydantic-settings==2.6.1
# via
# archivebox (pyproject.toml)
@ -414,6 +409,8 @@ python-ldap==3.4.4
# django-auth-ldap
python-slugify==8.0.4
# via python-benedict
python-statemachine==2.4.0
# via archivebox (pyproject.toml)
python-stdnum==1.20
# via bx-django-utils
pytz==2024.2
@ -424,14 +421,13 @@ pyyaml==6.0.2
# via
# libcst
# python-benedict
regex==2024.9.11
regex==2024.11.6
# via dateparser
requests==2.32.3
# via
# archivebox (pyproject.toml)
# pocket
# python-benedict
# yt-dlp
requests-tracker==0.3.3
# via archivebox (pyproject.toml)
rich==13.9.4
@ -443,7 +439,7 @@ rich-argparse==1.6.0
# via archivebox (pyproject.toml)
service-identity==24.2.0
# via twisted
setuptools==75.3.0
setuptools==75.5.0
# via
# archivebox (pyproject.toml)
# autobahn
@ -464,7 +460,7 @@ sonic-client==1.0.0
# via archivebox (pyproject.toml)
soupsieve==2.6
# via beautifulsoup4
sqlparse==0.5.1
sqlparse==0.5.2
# via
# django
# django-debug-toolbar
@ -492,12 +488,12 @@ types-pyyaml==6.0.12.20240917
typing-extensions==4.12.2
# via
# archivebox (pyproject.toml)
# abx-pkg
# django-pydantic-field
# django-stubs
# django-stubs-ext
# pydantic
# pydantic-core
# pydantic-pkgr
# twisted
tzdata==2024.2
# via archivebox (pyproject.toml)
@ -506,9 +502,7 @@ tzlocal==5.2
ulid-py==1.1.0
# via archivebox (pyproject.toml)
urllib3==2.2.3
# via
# requests
# yt-dlp
# via requests
uuid6==2024.7.10
# via typeid-python
w3lib==2.2.1
@ -517,13 +511,11 @@ wcwidth==0.2.13
# via
# ftfy
# prompt-toolkit
websockets==13.1
# via yt-dlp
xlrd==2.0.1
# via python-benedict
xmltodict==0.14.2
# via python-benedict
yt-dlp==2024.10.22
yt-dlp==2024.11.4
# via archivebox (pyproject.toml)
zope-interface==7.1.1
# via twisted

856
uv.lock

File diff suppressed because it is too large Load diff