From 80d8a6b667c51bb8ada3c9899d4b9e6e342f5543 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Tue, 15 Oct 2024 01:03:01 -0700 Subject: [PATCH] split archivebox.use into archivebox.reads and archivebox.writes --- archivebox/abx/archivebox/base_extractor.py | 4 +- .../abx/archivebox/{use.py => reads.py} | 106 ++++++----------- archivebox/abx/archivebox/writes.py | 10 ++ archivebox/config/views.py | 108 ++++++++++++------ archivebox/core/settings.py | 22 ++-- archivebox/core/views.py | 6 +- archivebox/machine/models.py | 9 +- archivebox/search/__init__.py | 4 +- 8 files changed, 138 insertions(+), 131 deletions(-) rename archivebox/abx/archivebox/{use.py => reads.py} (60%) create mode 100644 archivebox/abx/archivebox/writes.py diff --git a/archivebox/abx/archivebox/base_extractor.py b/archivebox/abx/archivebox/base_extractor.py index df4ff6d6..81ea2200 100644 --- a/archivebox/abx/archivebox/base_extractor.py +++ b/archivebox/abx/archivebox/base_extractor.py @@ -195,8 +195,8 @@ class BaseExtractor: @cached_property def BINARY(self) -> BaseBinary: - import abx.archivebox.use - for binary in abx.archivebox.use.get_BINARIES().values(): + import abx.archivebox.reads + for binary in abx.archivebox.reads.get_BINARIES().values(): if binary.name == self.binary: return binary raise ValueError(f'Binary {self.binary} not found') diff --git a/archivebox/abx/archivebox/use.py b/archivebox/abx/archivebox/reads.py similarity index 60% rename from archivebox/abx/archivebox/use.py rename to archivebox/abx/archivebox/reads.py index 3da249fd..f2479b5b 100644 --- a/archivebox/abx/archivebox/use.py +++ b/archivebox/abx/archivebox/reads.py @@ -1,10 +1,11 @@ __package__ = 'abx.archivebox' import importlib -from typing import Dict, Any, TYPE_CHECKING +from typing import Dict, Set, Any, TYPE_CHECKING from benedict import benedict +import abx from .. import pm if TYPE_CHECKING: @@ -24,52 +25,37 @@ def get_PLUGINS() -> Dict[str, Dict[str, Any]]: for plugin_dict in pm.hook.get_PLUGIN() for plugin_id, plugin in plugin_dict.items() }) - -def get_PLUGIN(plugin_id: str): - plugin_info = get_PLUGINS().get(plugin_id, {}) - assert plugin_info and getattr(plugin_info, 'PACKAGE', None), f'Plugin {plugin_id} not found' - - module = importlib.import_module(plugin_info['PACKAGE']) - extra_info ={ - 'ID': plugin_id, - 'id': plugin_id, - **plugin_info, - 'SOURCE_PATH': module.__file__, - 'MODULE': module, - 'CONFIG': {}, - 'BINARIES': {}, - 'BINPROVIDERS': {}, - 'EXTRACTORS': {}, - 'SEARCHBACKENDS': {}, - } - try: - extra_info['CONFIG'] = module.get_CONFIG()[plugin_id] - except AttributeError: - pass - try: - extra_info['BINARIES'] = module.get_BINARIES() - except AttributeError: - pass - try: - extra_info['BINPROVIDERS'] = module.get_BINPROVIDERS() - except AttributeError: - pass - try: - extra_info['EXTRACTORS'] = module.get_EXTRACTORS() - except AttributeError: - pass - try: - extra_info['SEARCHBACKENDS'] = module.get_SEARCHBACKENDS() - except AttributeError: - pass - return benedict(extra_info) -# def get_HOOKS(PLUGINS) -> Dict[str, 'BaseHook']: -# return benedict({ -# hook.id: hook -# for plugin in PLUGINS.values() -# for hook in plugin.hooks -# }) +def get_PLUGIN(plugin_id: str) -> Dict[str, Any]: + plugin_info = get_PLUGINS().get(plugin_id, {}) + package = plugin_info.get('package', plugin_info.get('PACKAGE', None)) + if not package: + return {'id': plugin_id, 'hooks': {}} + module = importlib.import_module(package) + hooks = abx.get_plugin_hooks(module.__package__) + assert plugin_info and (plugin_info.get('id') or plugin_info.get('ID') or hooks) + + return benedict({ + 'id': plugin_id, + 'label': getattr(module, '__label__', plugin_id), + 'module': module, + 'package': module.__package__, + 'hooks': hooks, + 'version': getattr(module, '__version__', '999.999.999'), + 'author': getattr(module, '__author__', 'Unknown'), + 'homepage': getattr(module, '__homepage__', 'https://github.com/ArchiveBox/ArchiveBox'), + 'dependencies': getattr(module, '__dependencies__', []), + 'source_code': module.__file__, + **plugin_info, + }) + + +def get_HOOKS() -> Set[str]: + return { + hook_name + for plugin_id in get_PLUGINS().keys() + for hook_name in get_PLUGIN(plugin_id).hooks + } def get_CONFIGS() -> Dict[str, 'BaseConfigSet']: return benedict({ @@ -77,7 +63,8 @@ def get_CONFIGS() -> Dict[str, 'BaseConfigSet']: for plugin_configs in pm.hook.get_CONFIG() for config_id, configset in plugin_configs.items() }) - + + def get_FLAT_CONFIG() -> Dict[str, Any]: return benedict({ key: value @@ -141,28 +128,3 @@ def get_SEARCHBACKENDS() -> Dict[str, 'BaseSearchBackend']: for plugin_searchbackends in pm.hook.get_SEARCHBACKENDS() for searchbackend_id,searchbackend in plugin_searchbackends.items() }) - - -########################### - - -# def extract(url_or_snapshot_id): -# from core.models import Snapshot - -# url, snapshot_abid, snapshot_id = None, None, None -# snapshot = None -# if '://' in url_or_snapshot_id: -# url = url_or_snapshot_id -# try: -# snapshot = Snapshot.objects.get(url=url) -# except Snapshot.DoesNotExist: -# snapshot = Snapshot(url=url_or_snapshot_id, timestamp=str(timezone.now().timestamp()), bookmarked_at=timezone.now()) -# snapshot.save() -# elif '-' in url_or_snapshot_id: -# snapshot_id = url_or_snapshot_id -# snapshot = Snapshot.objects.get(id=snapshot_id) -# else: -# snapshot_abid = url_or_snapshot_id -# snapshot = Snapshot.objects.get(abid=snapshot_abid) - -# return pm.hook.extract(snapshot_id=snapshot.id) diff --git a/archivebox/abx/archivebox/writes.py b/archivebox/abx/archivebox/writes.py new file mode 100644 index 00000000..78c1e098 --- /dev/null +++ b/archivebox/abx/archivebox/writes.py @@ -0,0 +1,10 @@ +__package__ = 'abx.archivebox' + +import importlib +from typing import Dict, Set, Any, TYPE_CHECKING + +from benedict import benedict + +import abx +from .. import pm + diff --git a/archivebox/config/views.py b/archivebox/config/views.py index cbafb3a6..db2c7eaa 100644 --- a/archivebox/config/views.py +++ b/archivebox/config/views.py @@ -14,7 +14,7 @@ from django.utils.html import format_html, mark_safe from admin_data_views.typing import TableContext, ItemContext from admin_data_views.utils import render_with_table_view, render_with_item_view, ItemLink -import abx.archivebox.use +import abx.archivebox.reads from archivebox.config import CONSTANTS from archivebox.misc.util import parse_date @@ -85,10 +85,12 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: if '_BINARY' in key or '_VERSION' in key } - for plugin_id in abx.archivebox.use.get_PLUGINS().keys(): - plugin = abx.archivebox.use.get_PLUGIN(plugin_id) + for plugin_id, plugin in abx.archivebox.reads.get_PLUGINS().items(): + plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + if not plugin.hooks.get('get_BINARIES'): + continue - for binary in plugin.BINARIES.values(): + for binary in plugin.hooks.get_BINARIES().values(): try: installed_binary = InstalledBinary.objects.get_from_db_or_cache(binary) binary = installed_binary.load_from_db() @@ -97,7 +99,7 @@ def binaries_list_view(request: HttpRequest, **kwargs) -> TableContext: rows['Binary Name'].append(ItemLink(binary.name, key=binary.name)) rows['Found Version'].append(f'✅ {binary.loaded_version}' if binary.loaded_version else '❌ missing') - rows['From Plugin'].append(plugin.PACKAGE) + rows['From Plugin'].append(plugin.package) rows['Provided By'].append( ', '.join( f'[{binprovider.name}]' if binprovider.name == getattr(binary.loaded_binprovider, 'name', None) else binprovider.name @@ -133,12 +135,16 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: binary = None plugin = None - for plugin_id in abx.archivebox.use.get_PLUGINS().keys(): - loaded_plugin = abx.archivebox.use.get_PLUGIN(plugin_id) - for loaded_binary in loaded_plugin.BINARIES.values(): - if loaded_binary.name == key: - binary = loaded_binary - plugin = loaded_plugin + for plugin_id in abx.archivebox.reads.get_PLUGINS().keys(): + loaded_plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + try: + for loaded_binary in loaded_plugin.hooks.get_BINARIES().values(): + if loaded_binary.name == key: + binary = loaded_binary + plugin = loaded_plugin + # break # last write wins + except Exception as e: + print(e) assert plugin and binary, f'Could not find a binary matching the specified name: {key}' @@ -155,7 +161,7 @@ def binary_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: "name": binary.name, "description": binary.abspath, "fields": { - 'plugin': plugin.PACKAGE, + 'plugin': plugin.package, 'binprovider': binary.loaded_binprovider, 'abspath': binary.loaded_abspath, 'version': binary.loaded_version, @@ -187,27 +193,52 @@ def plugins_list_view(request: HttpRequest, **kwargs) -> TableContext: # "Search Backends": [], } + config_colors = { + '_BINARY': '#339', + 'USE_': 'green', + 'SAVE_': 'green', + '_ARGS': '#33e', + 'KEY': 'red', + 'COOKIES': 'red', + 'AUTH': 'red', + 'SECRET': 'red', + 'TOKEN': 'red', + 'PASSWORD': 'red', + 'TIMEOUT': '#533', + 'RETRIES': '#533', + 'MAX': '#533', + 'MIN': '#533', + } + def get_color(key): + for pattern, color in config_colors.items(): + if pattern in key: + return color + return 'black' for plugin_id in settings.PLUGINS.keys(): - plugin = abx.archivebox.use.get_PLUGIN(plugin_id) - - rows['Label'].append(mark_safe(f'{plugin.LABEL}')) - rows['Version'].append(str(plugin.VERSION)) - rows['Author'].append(str(plugin.AUTHOR)) - rows['Package'].append(ItemLink(plugin.PACKAGE, key=plugin.PACKAGE)) - rows['Source Code'].append(format_html('{}', str(plugin.SOURCE_PATH).replace(str(Path('~').expanduser()), '~'))) + plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) + plugin.hooks.get_BINPROVIDERS = plugin.hooks.get('get_BINPROVIDERS', lambda: {}) + plugin.hooks.get_BINARIES = plugin.hooks.get('get_BINARIES', lambda: {}) + plugin.hooks.get_CONFIG = plugin.hooks.get('get_CONFIG', lambda: {}) + + rows['Label'].append(ItemLink(plugin.label, key=plugin.package)) + rows['Version'].append(str(plugin.version)) + rows['Author'].append(mark_safe(f'{plugin.author}')) + rows['Package'].append(ItemLink(plugin.package, key=plugin.package)) + rows['Source Code'].append(format_html('{}', str(plugin.source_code).replace(str(Path('~').expanduser()), '~'))) rows['Config'].append(mark_safe(''.join( - f'{key}={value}
' - for key, value in plugin.CONFIG.model_dump().items() + f'{key}={value}
' + for configdict in plugin.hooks.get_CONFIG().values() + for key, value in benedict(configdict).items() ))) rows['Binaries'].append(mark_safe(', '.join( f'{binary.name}' - for binary in plugin.BINARIES.values() + for binary in plugin.hooks.get_BINARIES().values() ))) rows['Package Managers'].append(mark_safe(', '.join( f'{binprovider.name}' - for binprovider in plugin.BINPROVIDERS.values() + for binprovider in plugin.hooks.get_BINPROVIDERS().values() ))) # rows['Search Backends'].append(mark_safe(', '.join( # f'{searchbackend.name}' @@ -224,30 +255,33 @@ def plugin_detail_view(request: HttpRequest, key: str, **kwargs) -> ItemContext: assert request.user.is_superuser, 'Must be a superuser to view configuration settings.' - plugin = None - for plugin_id, loaded_plugin in settings.PLUGINS.items0(): - if loaded_plugin.PACKAGE == key or plugin_id == key: - plugin = loaded_plugin + plugin_id = None + for check_plugin_id, loaded_plugin in settings.PLUGINS.items(): + if check_plugin_id.split('.')[-1] == key.split('.')[-1]: + plugin_id = check_plugin_id + break - assert plugin, f'Could not find a plugin matching the specified name: {key}' + assert plugin_id, f'Could not find a plugin matching the specified name: {key}' - try: - plugin = plugin.load_binaries() - except Exception as e: - print(e) + plugin = abx.archivebox.reads.get_PLUGIN(plugin_id) return ItemContext( slug=key, title=key, data=[ { - "name": plugin.PACKAGE, - "description": plugin.LABEL, + "name": plugin.package, + "description": plugin.label, "fields": { - "version": plugin.VERSION, - "author": plugin.AUTHOR, - "homepage": plugin.HOMEPAGE, + "id": plugin.id, + "package": plugin.package, + "label": plugin.label, + "version": plugin.version, + "author": plugin.author, + "homepage": plugin.homepage, "dependencies": getattr(plugin, 'DEPENDENCIES', []), + "source_code": plugin.source_code, + "hooks": plugin.hooks, }, "help_texts": { # TODO diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 5f007bb5..5ec0b7e8 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -10,7 +10,7 @@ from django.utils.crypto import get_random_string import abx import abx.archivebox -import abx.archivebox.use +import abx.archivebox.reads import abx.django.use from archivebox.config import DATA_DIR, PACKAGE_DIR, ARCHIVE_DIR, CONSTANTS @@ -53,17 +53,17 @@ ALL_PLUGINS = {**BUILTIN_PLUGINS, **PIP_PLUGINS, **USER_PLUGINS} # Load ArchiveBox plugins PLUGIN_MANAGER = abx.pm abx.archivebox.load_archivebox_plugins(PLUGIN_MANAGER, ALL_PLUGINS) -PLUGINS = abx.archivebox.use.get_PLUGINS() +PLUGINS = abx.archivebox.reads.get_PLUGINS() # Load ArchiveBox config from plugins -CONFIGS = abx.archivebox.use.get_CONFIGS() -CONFIG = FLAT_CONFIG = abx.archivebox.use.get_FLAT_CONFIG() -BINPROVIDERS = abx.archivebox.use.get_BINPROVIDERS() -BINARIES = abx.archivebox.use.get_BINARIES() -EXTRACTORS = abx.archivebox.use.get_EXTRACTORS() -SEARCHBACKENDS = abx.archivebox.use.get_SEARCHBACKENDS() -# REPLAYERS = abx.archivebox.use.get_REPLAYERS() -# ADMINDATAVIEWS = abx.archivebox.use.get_ADMINDATAVIEWS() +CONFIGS = abx.archivebox.reads.get_CONFIGS() +CONFIG = FLAT_CONFIG = abx.archivebox.reads.get_FLAT_CONFIG() +BINPROVIDERS = abx.archivebox.reads.get_BINPROVIDERS() +BINARIES = abx.archivebox.reads.get_BINARIES() +EXTRACTORS = abx.archivebox.reads.get_EXTRACTORS() +SEARCHBACKENDS = abx.archivebox.reads.get_SEARCHBACKENDS() +# REPLAYERS = abx.archivebox.reads.get_REPLAYERS() +# ADMINDATAVIEWS = abx.archivebox.reads.get_ADMINDATAVIEWS() ################################################################################ @@ -609,6 +609,6 @@ if DEBUG_REQUESTS_TRACKER: abx.django.use.register_checks() -# abx.archivebox.use.register_all_hooks(globals()) +# abx.archivebox.reads.register_all_hooks(globals()) # import ipdb; ipdb.set_trace() diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 1ffa6cd3..d423c146 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -503,7 +503,7 @@ def find_config_section(key: str) -> str: if key in CONSTANTS_CONFIG: return 'CONSTANT' matching_sections = [ - section.id for section in settings.CONFIGS.values() if key in section.model_fields + section_id for section_id, section in settings.CONFIGS.items() if key in section.model_fields ] section = matching_sections[0] if matching_sections else 'DYNAMIC' return section @@ -560,9 +560,9 @@ def live_config_list_view(request: HttpRequest, **kwargs) -> TableContext: # "Aliases": [], } - for section in reversed(list(settings.CONFIGS.values())): + for section_id, section in reversed(list(settings.CONFIGS.items())): for key, field in section.model_fields.items(): - rows['Section'].append(section.id) # section.replace('_', ' ').title().replace(' Config', '') + rows['Section'].append(section_id) # section.replace('_', ' ').title().replace(' Config', '') rows['Key'].append(ItemLink(key, key=key)) rows['Type'].append(format_html('{}', find_config_type(key))) rows['Value'].append(mark_safe(f'{getattr(section, key)}') if key_is_safe(key) else '******** (redacted)') diff --git a/archivebox/machine/models.py b/archivebox/machine/models.py index e8cf3a2c..229e1d83 100644 --- a/archivebox/machine/models.py +++ b/archivebox/machine/models.py @@ -8,7 +8,8 @@ from django.db import models from django.utils import timezone from django.utils.functional import cached_property -import abx.archivebox.use +import abx.archivebox.reads + from abx.archivebox.base_binary import BaseBinary, BaseBinProvider from archivebox.abid_utils.models import ABIDModel, ABIDField, AutoDateTimeField, ModelWithHealthStats @@ -290,7 +291,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): if not hasattr(self, 'machine'): self.machine = Machine.objects.current() if not self.binprovider: - all_known_binproviders = list(abx.archivebox.use.get_BINPROVIDERS().values()) + all_known_binproviders = list(abx.archivebox.reads.get_BINPROVIDERS().values()) binary = BaseBinary(name=self.name, binproviders=all_known_binproviders).load(fresh=True) self.binprovider = binary.loaded_binprovider.name if binary.loaded_binprovider else None if not self.abspath: @@ -304,7 +305,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): @cached_property def BINARY(self) -> BaseBinary: - for binary in abx.archivebox.use.get_BINARIES().values(): + for binary in abx.archivebox.reads.get_BINARIES().values(): if binary.name == self.name: return binary raise Exception(f'Orphaned InstalledBinary {self.name} {self.binprovider} was found in DB, could not find any plugin that defines it') @@ -312,7 +313,7 @@ class InstalledBinary(ABIDModel, ModelWithHealthStats): @cached_property def BINPROVIDER(self) -> BaseBinProvider: - for binprovider in abx.archivebox.use.get_BINPROVIDERS().values(): + for binprovider in abx.archivebox.reads.get_BINPROVIDERS().values(): if binprovider.name == self.binprovider: return binprovider raise Exception(f'Orphaned InstalledBinary(name={self.name}) was found in DB, could not find any plugin that defines BinProvider(name={self.binprovider})') diff --git a/archivebox/search/__init__.py b/archivebox/search/__init__.py index f7394171..2e7d4f69 100644 --- a/archivebox/search/__init__.py +++ b/archivebox/search/__init__.py @@ -6,7 +6,7 @@ from typing import List, Union from django.db.models import QuerySet from django.conf import settings -import abx.archivebox.use +import abx.archivebox.reads from archivebox.index.schema import Link from archivebox.misc.util import enforce_types @@ -57,7 +57,7 @@ def get_indexable_content(results: QuerySet): def import_backend(): - for backend in abx.archivebox.use.get_SEARCHBACKENDS().values(): + for backend in abx.archivebox.reads.get_SEARCHBACKENDS().values(): if backend.name == SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE: return backend raise Exception(f'Could not load {SEARCH_BACKEND_CONFIG.SEARCH_BACKEND_ENGINE} as search backend')