split archivebox.use into archivebox.reads and archivebox.writes

This commit is contained in:
Nick Sweeting 2024-10-15 01:03:01 -07:00
parent aaf069fab0
commit 80d8a6b667
No known key found for this signature in database
8 changed files with 138 additions and 131 deletions

View file

@ -195,8 +195,8 @@ class BaseExtractor:
@cached_property
def BINARY(self) -> BaseBinary:
import abx.archivebox.use
for binary in abx.archivebox.use.get_BINARIES().values():
import abx.archivebox.reads
for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.binary:
return binary
raise ValueError(f'Binary {self.binary} not found')

View file

@ -1,10 +1,11 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, Any, TYPE_CHECKING
from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict
import abx
from .. import pm
if TYPE_CHECKING:
@ -24,52 +25,37 @@ def get_PLUGINS() -> Dict[str, Dict[str, Any]]:
for plugin_dict in pm.hook.get_PLUGIN()
for plugin_id, plugin in plugin_dict.items()
})
def get_PLUGIN(plugin_id: str):
plugin_info = get_PLUGINS().get(plugin_id, {})
assert plugin_info and getattr(plugin_info, 'PACKAGE', None), f'Plugin {plugin_id} not found'
module = importlib.import_module(plugin_info['PACKAGE'])
extra_info ={
'ID': plugin_id,
'id': plugin_id,
**plugin_info,
'SOURCE_PATH': module.__file__,
'MODULE': module,
'CONFIG': {},
'BINARIES': {},
'BINPROVIDERS': {},
'EXTRACTORS': {},
'SEARCHBACKENDS': {},
}
try:
extra_info['CONFIG'] = module.get_CONFIG()[plugin_id]
except AttributeError:
pass
try:
extra_info['BINARIES'] = module.get_BINARIES()
except AttributeError:
pass
try:
extra_info['BINPROVIDERS'] = module.get_BINPROVIDERS()
except AttributeError:
pass
try:
extra_info['EXTRACTORS'] = module.get_EXTRACTORS()
except AttributeError:
pass
try:
extra_info['SEARCHBACKENDS'] = module.get_SEARCHBACKENDS()
except AttributeError:
pass
return benedict(extra_info)
# def get_HOOKS(PLUGINS) -> Dict[str, 'BaseHook']:
# return benedict({
# hook.id: hook
# for plugin in PLUGINS.values()
# for hook in plugin.hooks
# })
def get_PLUGIN(plugin_id: str) -> Dict[str, Any]:
plugin_info = get_PLUGINS().get(plugin_id, {})
package = plugin_info.get('package', plugin_info.get('PACKAGE', None))
if not package:
return {'id': plugin_id, 'hooks': {}}
module = importlib.import_module(package)
hooks = abx.get_plugin_hooks(module.__package__)
assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks)
return benedict({
'id': plugin_id,
'label': getattr(module, '__label__', plugin_id),
'module': module,
'package': module.__package__,
'hooks': hooks,
'version': getattr(module, '__version__', '999.999.999'),
'author': getattr(module, '__author__', 'Unknown'),
'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'),
'dependencies': getattr(module, '__dependencies__', []),
'source_code': module.__file__,
**plugin_info,
})
def get_HOOKS() -> Set[str]:
return {
hook_name
for plugin_id in get_PLUGINS().keys()
for hook_name in get_PLUGIN(plugin_id).hooks
}
def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
return benedict({
@ -77,7 +63,8 @@ def get_CONFIGS() -> Dict[str, 'BaseConfigSet']:
for plugin_configs in pm.hook.get_CONFIG()
for config_id, configset in plugin_configs.items()
})
def get_FLAT_CONFIG() -> Dict[str, Any]:
return benedict({
key: value
@ -141,28 +128,3 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']:
for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS()
for searchbackend_id,searchbackend in plugin_searchbackends.items()
})
###########################
# def extract(url_or_snapshot_id):
# from core.models import Snapshot
# url, snapshot_abid, snapshot_id = None, None, None
# snapshot = None
# if '://' in url_or_snapshot_id:
# url = url_or_snapshot_id
# try:
# snapshot = Snapshot.objects.get(url=url)
# except Snapshot.DoesNotExist:
# snapshot = Snapshot(url=url_or_snapshot_id, timestamp=str(timezone.now().timestamp()), bookmarked_at=timezone.now())
# snapshot.save()
# elif '-' in url_or_snapshot_id:
# snapshot_id = url_or_snapshot_id
# snapshot = Snapshot.objects.get(id=snapshot_id)
# else:
# snapshot_abid = url_or_snapshot_id
# snapshot = Snapshot.objects.get(abid=snapshot_abid)
# return pm.hook.extract(snapshot_id=snapshot.id)

View file

@ -0,0 +1,10 @@
__package__ = 'abx.archivebox'
import importlib
from typing import Dict, Set, Any, TYPE_CHECKING
from benedict import benedict
import abx
from .. import pm

View file

@ -14,7 +14,7 @@ from django.utils.html import format_html, mark_safe
from admin_data_views.typing import TableContext, ItemContext
from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink
import abx.archivebox.use
import abx.archivebox.reads
from archivebox.config import CONSTANTS
from archivebox.misc.util import parse_date
@ -85,10 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
if '_BINARY' in key or '_VERSION' in key
}
for plugin_id in abx.archivebox.use.get_PLUGINS().keys():
plugin = abx.archivebox.use.get_PLUGIN(plugin_id)
for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items():
plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
if not plugin.hooks.get('get_BINARIES'):
continue
for binary in plugin.BINARIES.values():
for binary in plugin.hooks.get_BINARIES().values():
try:
installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary)
binary = installed_binary.load_from_db()
@ -97,7 +99,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext:
rows['Binary Name'].append(ItemLink(binary.name, key=binary.name))
rows['Found Version'].append(f'{binary.loaded_version}' if binary.loaded_version else '❌ missing')
rows['From Plugin'].append(plugin.PACKAGE)
rows['From Plugin'].append(plugin.package)
rows['Provided By'].append(
', '.join(
f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name
@ -133,12 +135,16 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
binary = None
plugin = None
for plugin_id in abx.archivebox.use.get_PLUGINS().keys():
loaded_plugin = abx.archivebox.use.get_PLUGIN(plugin_id)
for loaded_binary in loaded_plugin.BINARIES.values():
if loaded_binary.name == key:
binary = loaded_binary
plugin = loaded_plugin
for plugin_id in abx.archivebox.reads.get_PLUGINS().keys():
loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
try:
for loaded_binary in loaded_plugin.hooks.get_BINARIES().values():
if loaded_binary.name == key:
binary = loaded_binary
plugin = loaded_plugin
# break # last write wins
except Exception as e:
print(e)
assert plugin and binary, f'Could not find a binary matching the specified name: {key}'
@ -155,7 +161,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
"name": binary.name,
"description": binary.abspath,
"fields": {
'plugin': plugin.PACKAGE,
'plugin': plugin.package,
'binprovider': binary.loaded_binprovider,
'abspath': binary.loaded_abspath,
'version': binary.loaded_version,
@ -187,27 +193,52 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Search Backends": [],
}
config_colors = {
'_BINARY': '#339',
'USE_': 'green',
'SAVE_': 'green',
'_ARGS': '#33e',
'KEY': 'red',
'COOKIES': 'red',
'AUTH': 'red',
'SECRET': 'red',
'TOKEN': 'red',
'PASSWORD': 'red',
'TIMEOUT': '#533',
'RETRIES': '#533',
'MAX': '#533',
'MIN': '#533',
}
def get_color(key):
for pattern, color in config_colors.items():
if pattern in key:
return color
return 'black'
for plugin_id in settings.PLUGINS.keys():
plugin = abx.archivebox.use.get_PLUGIN(plugin_id)
rows['Label'].append(mark_safe(f'<a href="{plugin.HOMEPAGE}" target="_blank">{plugin.LABEL}</a>'))
rows['Version'].append(str(plugin.VERSION))
rows['Author'].append(str(plugin.AUTHOR))
rows['Package'].append(ItemLink(plugin.PACKAGE, key=plugin.PACKAGE))
rows['Source Code'].append(format_html('<code>{}</code>', str(plugin.SOURCE_PATH).replace(str(Path('~').expanduser()), '~')))
plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {})
plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {})
plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {})
rows['Label'].append(ItemLink(plugin.label, key=plugin.package))
rows['Version'].append(str(plugin.version))
rows['Author'].append(mark_safe(f'<a href="{plugin.homepage}" target="_blank">{plugin.author}</a>'))
rows['Package'].append(ItemLink(plugin.package, key=plugin.package))
rows['Source Code'].append(format_html('<code>{}</code>', str(plugin.source_code).replace(str(Path('~').expanduser()), '~')))
rows['Config'].append(mark_safe(''.join(
f'<a href="/admin/environment/config/{key}/"><b><code>{key}</code></b>=<code>{value}</code></a><br/>'
for key, value in plugin.CONFIG.model_dump().items()
f'<a href="/admin/environment/config/{key}/"><b><code style="color: {get_color(key)};">{key}</code></b>=<code>{value}</code></a><br/>'
for configdict in plugin.hooks.get_CONFIG().values()
for key, value in benedict(configdict).items()
)))
rows['Binaries'].append(mark_safe(', '.join(
f'<a href="/admin/environment/binaries/{binary.name}/"><code>{binary.name}</code></a>'
for binary in plugin.BINARIES.values()
for binary in plugin.hooks.get_BINARIES().values()
)))
rows['Package Managers'].append(mark_safe(', '.join(
f'<a href="/admin/environment/binproviders/{binprovider.name}/"><code>{binprovider.name}</code></a>'
for binprovider in plugin.BINPROVIDERS.values()
for binprovider in plugin.hooks.get_BINPROVIDERS().values()
)))
# rows['Search Backends'].append(mark_safe(', '.join(
# f'<a href="/admin/environment/searchbackends/{searchbackend.name}/"><code>{searchbackend.name}</code></a>'
@ -224,30 +255,33 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext:
assert request.user.is_superuser, 'Must be a superuser to view configuration settings.'
plugin = None
for plugin_id, loaded_plugin in settings.PLUGINS.items0():
if loaded_plugin.PACKAGE == key or plugin_id == key:
plugin = loaded_plugin
plugin_id = None
for check_plugin_id, loaded_plugin in settings.PLUGINS.items():
if check_plugin_id.split('.')[-1] == key.split('.')[-1]:
plugin_id = check_plugin_id
break
assert plugin, f'Could not find a plugin matching the specified name: {key}'
assert plugin_id, f'Could not find a plugin matching the specified name: {key}'
try:
plugin = plugin.load_binaries()
except Exception as e:
print(e)
plugin = abx.archivebox.reads.get_PLUGIN(plugin_id)
return ItemContext(
slug=key,
title=key,
data=[
{
"name": plugin.PACKAGE,
"description": plugin.LABEL,
"name": plugin.package,
"description": plugin.label,
"fields": {
"version": plugin.VERSION,
"author": plugin.AUTHOR,
"homepage": plugin.HOMEPAGE,
"id": plugin.id,
"package": plugin.package,
"label": plugin.label,
"version": plugin.version,
"author": plugin.author,
"homepage": plugin.homepage,
"dependencies": getattr(plugin, 'DEPENDENCIES', []),
"source_code": plugin.source_code,
"hooks": plugin.hooks,
},
"help_texts": {
# TODO

View file

@ -10,7 +10,7 @@ from django.utils.crypto import get_random_string
import abx
import abx.archivebox
import abx.archivebox.use
import abx.archivebox.reads
import abx.django.use
from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS
@ -53,17 +53,17 @@ ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS}
# Load ArchiveBox plugins
PLUGIN_MANAGER = abx.pm
abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS)
PLUGINS = abx.archivebox.use.get_PLUGINS()
PLUGINS = abx.archivebox.reads.get_PLUGINS()
# Load ArchiveBox config from plugins
CONFIGS = abx.archivebox.use.get_CONFIGS()
CONFIG = FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS()
BINARIES = abx.archivebox.use.get_BINARIES()
EXTRACTORS = abx.archivebox.use.get_EXTRACTORS()
SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS()
# REPLAYERS = abx.archivebox.use.get_REPLAYERS()
# ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS()
CONFIGS = abx.archivebox.reads.get_CONFIGS()
CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG()
BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS()
BINARIES = abx.archivebox.reads.get_BINARIES()
EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS()
SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS()
# REPLAYERS = abx.archivebox.reads.get_REPLAYERS()
# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS()
################################################################################
@ -609,6 +609,6 @@ if DEBUG_REQUESTS_TRACKER:
abx.django.use.register_checks()
# abx.archivebox.use.register_all_hooks(globals())
# abx.archivebox.reads.register_all_hooks(globals())
# import ipdb; ipdb.set_trace()

View file

@ -503,7 +503,7 @@ def find_config_section(key: str) -> str:
if key in CONSTANTS_CONFIG:
return 'CONSTANT'
matching_sections = [
section.id for section in settings.CONFIGS.values() if key in section.model_fields
section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields
]
section = matching_sections[0] if matching_sections else 'DYNAMIC'
return section
@ -560,9 +560,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext:
# "Aliases": [],
}
for section in reversed(list(settings.CONFIGS.values())):
for section_id, section in reversed(list(settings.CONFIGS.items())):
for key, field in section.model_fields.items():
rows['Section'].append(section.id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '')
rows['Key'].append(ItemLink(key, key=key))
rows['Type'].append(format_html('<code>{}</code>', find_config_type(key)))
rows['Value'].append(mark_safe(f'<code>{getattr(section, key)}</code>') if key_is_safe(key) else '******** (redacted)')

View file

@ -8,7 +8,8 @@ from django.db import models
from django.utils import timezone
from django.utils.functional import cached_property
import abx.archivebox.use
import abx.archivebox.reads
from abx.archivebox.base_binary import BaseBinary, BaseBinProvider
from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats
@ -290,7 +291,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
if not hasattr(self, 'machine'):
self.machine = Machine.objects.current()
if not self.binprovider:
all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values())
all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values())
binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True)
self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None
if not self.abspath:
@ -304,7 +305,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
@cached_property
def BINARY(self) -> BaseBinary:
for binary in abx.archivebox.use.get_BINARIES().values():
for binary in abx.archivebox.reads.get_BINARIES().values():
if binary.name == self.name:
return binary
raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it')
@ -312,7 +313,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats):
@cached_property
def BINPROVIDER(self) -> BaseBinProvider:
for binprovider in abx.archivebox.use.get_BINPROVIDERS().values():
for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values():
if binprovider.name == self.binprovider:
return binprovider
raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})')

View file

@ -6,7 +6,7 @@ from typing import List, Union
from django.db.models import QuerySet
from django.conf import settings
import abx.archivebox.use
import abx.archivebox.reads
from archivebox.index.schema import Link
from archivebox.misc.util import enforce_types
@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet):
def import_backend():
for backend in abx.archivebox.use.get_SEARCHBACKENDS().values():
for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values():
if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE:
return backend
raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')