From 2c2d034d6d21e0b9b632659af8f81c94a8866c6b Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 23 Aug 2024 02:01:02 -0700 Subject: [PATCH] move to new vendoring fallback logic --- archivebox/config.py | 5 ++++ archivebox/core/forms.py | 2 +- archivebox/parsers/pocket_api.py | 2 +- archivebox/system.py | 3 +-- archivebox/util.py | 2 +- archivebox/vendor/__init__.py | 34 +++++++++++++++++++++++++++ archivebox/vendor/atomicwrites.py | 1 - archivebox/vendor/base32_crockford.py | 1 - archivebox/vendor/package-lock.json | 1 - archivebox/vendor/package.json | 1 - archivebox/vendor/pocket.py | 1 - archivebox/vendor/pydantic-pkgr | 1 + archivebox/vendor/taggit_utils.py | 1 - pyproject.toml | 15 ++++++++---- 14 files changed, 54 insertions(+), 16 deletions(-) delete mode 120000 archivebox/vendor/atomicwrites.py delete mode 120000 archivebox/vendor/base32_crockford.py delete mode 120000 archivebox/vendor/package-lock.json delete mode 120000 archivebox/vendor/package.json delete mode 120000 archivebox/vendor/pocket.py create mode 160000 archivebox/vendor/pydantic-pkgr delete mode 120000 archivebox/vendor/taggit_utils.py diff --git a/archivebox/config.py b/archivebox/config.py index de086304..3e1051aa 100644 --- a/archivebox/config.py +++ b/archivebox/config.py @@ -52,6 +52,11 @@ from .config_stubs import ( ConfigDefaultDict, ) +# load fallback libraries from vendor dir +from .vendor import load_vendored_libs +load_vendored_libs() + + ############################### Config Schema ################################## diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 193c0d05..3a64eb45 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -4,7 +4,7 @@ from django import forms from ..util import URL_REGEX from ..parsers import PARSERS -from ..vendor.taggit_utils import edit_string_for_tags, parse_tags +from taggit.utils import edit_string_for_tags, parse_tags PARSER_CHOICES = [ (parser_key, parser[0]) diff --git a/archivebox/parsers/pocket_api.py b/archivebox/parsers/pocket_api.py index eec4d73b..3415f35e 100644 --- a/archivebox/parsers/pocket_api.py +++ b/archivebox/parsers/pocket_api.py @@ -7,7 +7,7 @@ from typing import IO, Iterable, Optional from configparser import ConfigParser from pathlib import Path -from ..vendor.pocket import Pocket +from pocket import Pocket from ..index.schema import Link from ..util import enforce_types diff --git a/archivebox/system.py b/archivebox/system.py index bced0bac..58571000 100644 --- a/archivebox/system.py +++ b/archivebox/system.py @@ -11,13 +11,12 @@ from typing import Optional, Union, Set, Tuple from subprocess import _mswindows, PIPE, Popen, CalledProcessError, CompletedProcess, TimeoutExpired from crontab import CronTab -from .vendor.atomicwrites import atomic_write as lib_atomic_write +from atomicwrites import atomic_write as lib_atomic_write from .util import enforce_types, ExtendedEncoder from .config import PYTHON_BINARY, OUTPUT_PERMISSIONS, DIR_OUTPUT_PERMISSIONS, ENFORCE_ATOMIC_WRITES - def run(cmd, *args, input=None, capture_output=True, timeout=None, check=False, text=False, start_new_session=True, **kwargs): """Patched of subprocess.run to kill forked child subprocesses and fix blocking io making timeout=innefective Mostly copied from https://github.com/python/cpython/blob/master/Lib/subprocess.py diff --git a/archivebox/util.py b/archivebox/util.py index d9dd4dbf..2548a234 100644 --- a/archivebox/util.py +++ b/archivebox/util.py @@ -16,7 +16,7 @@ from datetime import datetime, timezone from dateparser import parse as dateparser from requests.exceptions import RequestException, ReadTimeout -from .vendor.base32_crockford import encode as base32_encode # type: ignore +from base32_crockford import encode as base32_encode # type: ignore from w3lib.encoding import html_body_declared_encoding, http_content_type_encoding from os.path import lexists from os import remove as remove_file diff --git a/archivebox/vendor/__init__.py b/archivebox/vendor/__init__.py index e69de29b..e19c45af 100644 --- a/archivebox/vendor/__init__.py +++ b/archivebox/vendor/__init__.py @@ -0,0 +1,34 @@ +import sys +import inspect +import importlib +from pathlib import Path + +VENDOR_DIR = Path(__file__).parent + +VENDORED_LIBS = { + # sys.path dir: library name + 'python-atomicwrites': 'atomicwrites', + 'django-taggit': 'taggit', + 'pydantic-pkgr': 'pydantic_pkgr', + 'pocket': 'pocket', + 'base32-crockford': 'base32_crockford', +} + +def load_vendored_libs(): + for lib_subdir, lib_name in VENDORED_LIBS.items(): + lib_dir = VENDOR_DIR / lib_subdir + assert lib_dir.is_dir(), 'Expected vendor libary {lib_name} could not be found in {lib_dir}' + + try: + lib = importlib.import_module(lib_name) + # print(f"Successfully imported lib from environment {lib_name}: {inspect.getfile(lib)}") + except ImportError: + sys.path.append(str(lib_dir)) + try: + lib = importlib.import_module(lib_name) + # print(f"Successfully imported lib from vendored fallback {lib_name}: {inspect.getfile(lib)}") + except ImportError as e: + print(f"Failed to import lib from environment or vendored fallback {lib_name}: {e}", file=sys.stderr) + sys.exit(1) + + diff --git a/archivebox/vendor/atomicwrites.py b/archivebox/vendor/atomicwrites.py deleted file mode 120000 index 73abfe4c..00000000 --- a/archivebox/vendor/atomicwrites.py +++ /dev/null @@ -1 +0,0 @@ -python-atomicwrites/atomicwrites/__init__.py \ No newline at end of file diff --git a/archivebox/vendor/base32_crockford.py b/archivebox/vendor/base32_crockford.py deleted file mode 120000 index a5d9c64f..00000000 --- a/archivebox/vendor/base32_crockford.py +++ /dev/null @@ -1 +0,0 @@ -base32-crockford/base32_crockford.py \ No newline at end of file diff --git a/archivebox/vendor/package-lock.json b/archivebox/vendor/package-lock.json deleted file mode 120000 index 322001ae..00000000 --- a/archivebox/vendor/package-lock.json +++ /dev/null @@ -1 +0,0 @@ -../../package-lock.json \ No newline at end of file diff --git a/archivebox/vendor/package.json b/archivebox/vendor/package.json deleted file mode 120000 index 138a42cd..00000000 --- a/archivebox/vendor/package.json +++ /dev/null @@ -1 +0,0 @@ -../../package.json \ No newline at end of file diff --git a/archivebox/vendor/pocket.py b/archivebox/vendor/pocket.py deleted file mode 120000 index 37352d27..00000000 --- a/archivebox/vendor/pocket.py +++ /dev/null @@ -1 +0,0 @@ -pocket/pocket.py \ No newline at end of file diff --git a/archivebox/vendor/pydantic-pkgr b/archivebox/vendor/pydantic-pkgr new file mode 160000 index 00000000..61cf4bf5 --- /dev/null +++ b/archivebox/vendor/pydantic-pkgr @@ -0,0 +1 @@ +Subproject commit 61cf4bf5db18c9ab374d5f947c41921c728dc731 diff --git a/archivebox/vendor/taggit_utils.py b/archivebox/vendor/taggit_utils.py deleted file mode 120000 index f36776db..00000000 --- a/archivebox/vendor/taggit_utils.py +++ /dev/null @@ -1 +0,0 @@ -django-taggit/taggit/utils.py \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index ebeccd59..66501c3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,12 +29,9 @@ dependencies = [ "croniter>=2.0.5", # for: archivebox schedule "ipython>=8.23.0", # for: archivebox shell # Extractor Dependencies - "yt-dlp>=2024.4.9", # for: media + "yt-dlp>=2024.8.6", # for: media # "playwright>=1.43.0; platform_machine != 'armv7l'", # WARNING: playwright doesn't have any sdist, causes trouble on build systems that refuse to install wheel-only packages - # TODO: add more extractors - # - gallery-dl - # - scihubdl - # - See Github issues for more... + "django-signal-webhooks>=0.3.0", "django-admin-data-views>=0.3.1", "ulid-py>=1.1.0", @@ -43,6 +40,14 @@ dependencies = [ "django-pydantic-field>=0.3.9", "django-jsonform>=2.22.0", "django-stubs>=5.0.2", + + # these can be safely omitted when installation subsystem does not provide these as packages (e.g. apt/debian) + # archivebox will automatically load fallback vendored copies bundled via archivebox/vendor/__init__.py + "pydantic-pkgr>=0.1.4", + "atomicwrites==1.4.0", + "pocket==0.3.7", + "django-taggit==1.3.0", + "base32-crockford==0.3.0", ] homepage = "https://github.com/ArchiveBox/ArchiveBox"