diff --git a/.github/ISSUE_TEMPLATE/1-bug_report.yml b/.github/ISSUE_TEMPLATE/1-bug_report.yml new file mode 100644 index 00000000..0bf95ef0 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/1-bug_report.yml @@ -0,0 +1,198 @@ +name: 🐞 Bug report +description: Report a bug or error you encountered in ArchiveBox +title: "Bug: ..." +assignees: + - pirate +type: 'Bug' +body: + - type: markdown + attributes: + value: | + *Please note:* it is normal to see errors occasionally for some extractors on some URLs (not every extractor will work on every type of page). + Please report archiving errors if you are seeing them *consistently across many URLs* or if they are *preventing you from using ArchiveBox*. + + - type: textarea + id: description + attributes: + label: Provide a screenshot and describe the bug + description: | + Attach a screenshot and describe what the issue is, what you expected to happen, and if relevant, the *URLs you were trying to archive*. + placeholder: | + Got a bunch of 'singlefile was unable to archive this page' errors when trying to archive URLs from this site: https://example.com/xyz ... + I also tried to archive the same URLs using `singlefile` directly and some of them worked but not all of them. etc. ... + validations: + required: true + + - type: textarea + id: steps_to_reproduce + attributes: + label: Steps to reproduce + description: Please include the exact steps you took to trigger the issue. + render: markdown + placeholder: | + 1. Started ArchiveBox by running: `docker run -v $PWD:/data -p 8000:8000 archivebox/archivebox:latest` + 2. Went to the https://127.0.0.1:8000/add/ page in Google Chrome + 3. Typed 'https://example.com/xyz' into the 'Add URL' input field + 4. Clicked the 'Add+' button + 5. Got a 500 error and saw the errors below in terminal + validations: + required: true + + - type: textarea + id: logs + attributes: + label: Logs or errors + description: "Paste any terminal output, logs, or errors (check `data/logs/errors.log` as well)." + placeholder: | + ╭─────────────────────────────────────────────────────────────────────────────────────────────────────────╮ + │ [2024-11-02 19:54:28] ArchiveBox v0.8.6rc0: archivebox add https://example.com#1234567 │ + ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────╯ + + [+] [2024-11-02 19:54:29] Adding 1 links to index (crawl depth=0)... + > Saved verbatim input to sources/1730577269-import.txt + > Parsed 1 URLs from input (Generic TXT) + ... + render: shell + validations: + required: false + + - type: textarea + id: version + attributes: + label: ArchiveBox Version + description: | + **REQUIRED:** Run the `archivebox version` command inside your collection dir and paste the *full output* here (*not just the version number*). + For Docker Compose run: `docker compose run archivebox version` + For plain Docker run: `docker run -v $PWD:/data archivebox/archivebox version` + render: shell + placeholder: | + 0.8.6 + ArchiveBox v0.8.6rc0 COMMIT_HASH=721427a BUILD_TIME=2024-10-21 12:57:02 1729515422 + IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-15.1-arm64-arm-64bit PYTHON=Cpython (venv) + EUID=502:20 UID=502:20 PUID=502:20 FS_UID=502:20 FS_PERMS=644 FS_ATOMIC=True FS_REMOTE=False + DEBUG=False IS_TTY=True SUDO=False ID=dfa11485:aa78ad45 SEARCH_BACKEND=ripgrep LDAP=False + + Binary Dependencies: + √ python 3.11.9 venv_pip ~/.venv/bin/python + √ django 5.1.2 venv_pip ~/.venv/lib/python3.11/site-packages/django/__init__.py + √ sqlite 2.6.0 venv_pip ~/.venv/lib/python3.11/site-packages/django/db/backends/sqlite3/base.py + √ pip 24.3.1 venv_pip ~/.venv/bin/pip + ... + validations: + required: true + + - type: dropdown + id: install_method + validations: + required: true + attributes: + label: How did you install the version of ArchiveBox you are using? + multiple: false + options: + - pip + - apt + - brew + - nix + - Docker (or other container system like podman/LXC/Kubernetes or TrueNAS/Cloudron/YunoHost/etc.) + - Other + + - type: dropdown + id: operating_system + validations: + required: true + attributes: + label: What operating system are you running on? + description: | + Please note we are *unable to provide support for Windows users* unless you are using [Docker on Windows](https://github.com/ArchiveBox/archivebox#:~:text=windows%20without%20docker). + multiple: false + options: + - Linux (Ubuntu/Debian/Arch/Alpine/etc.) + - macOS (including Docker on macOS) + - BSD (FreeBSD/OpenBSD/NetBSD/etc.) + - Windows (including WSL, WSL2, Docker Desktop on Windows) + - Other + + - type: checkboxes + id: filesystem + attributes: + label: What type of drive are you using to store your ArchiveBox data? + description: Are you using a [remote filesystem](https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-Up-Storage#supported-remote-filesystems) or FUSE mount for `data/` or `data/archive` (e.g. NFS/SMB/CIFS/etc. or FUSE/RClone/S3/B2/OneDrive/etc.)? + options: + - label: "`data/` is on a local SSD or NVMe drive" + required: false + - label: "`data/` is on a spinning hard drive or external USB drive" + required: false + - label: "`data/` is on a network mount (e.g. NFS/SMB/CIFS/etc.)" + required: false + - label: "`data/` is on a FUSE mount (e.g. SSHFS/RClone/S3/B2/OneDrive, etc.)" + required: false + + + - type: textarea + id: docker_compose_yml + attributes: + label: Docker Compose Configuration + description: "If using Docker Compose, please share your full `docker-compose.yml` file. If using plain Docker, paste the `docker run ...` command you use." + placeholder: | + services: + archivebox: + image: archivebox/archivebox:latest + ports: + - 8000:8000 + volumes: + - ./data:/data + environment: + - ADMIN_USERNAME=admin + - ADMIN_PASSWORD=******** + - ALLOWED_HOSTS=* + - CSRF_TRUSTED_ORIGINS=https://archivebox.example.com + - PUBLIC_INDEX=True + - PUBLIC_SNAPSHOTS=True + - PUBLIC_ADD_VIEW=False + ... + + archivebox_scheduler: + image: archivebox/archivebox:latest + command: schedule --foreground --update --every=day + environment: + ... + + ... + render: shell + validations: + required: false + + - type: textarea + id: configuration + attributes: + label: ArchiveBox Configuration + description: "Please share your full `data/ArchiveBox.conf` file here." + render: shell + placeholder: | + [SERVER_CONFIG] + SECRET_KEY = "*********************" + + WGET_RESTRICT_FILE_NAMES=windows + USE_SYSTEM_WGET=true + CHECK_SSL_VALIDITY=false + ... + validations: + required: false + + + - type: markdown + attributes: + value: | + --- + + We strive to answer issues as quickly as possible, it usually takes us *about a ~week* to respond. + Make sure your `data/` is [**fully backed up**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#disk-layout) before trying anything suggested here, **we are not responsible for data loss**. + + In the meantime please consider: + + - 💰 [Donating to support ArchiveBox open-source](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations) + - 👨‍✈️ [Hiring us for corporate deployments](https://docs.monadical.com/s/archivebox-consulting-services) with professional support, custom feature development, and help with CAPTCHAs/rate-limits + - 🔍 [Searching the Documentation](https://docs.archivebox.io/) for answers to common questions + - 📚 Reading the [Troubleshooting Guide](https://github.com/ArchiveBox/ArchiveBox/wiki) + - ✨ Testing out a newer [`BETA` release](https://github.com/ArchiveBox/ArchiveBox/releases) (issues are often already fixed in our latest `BETA` releases) + diff --git a/.github/ISSUE_TEMPLATE/2-feature_request.yml b/.github/ISSUE_TEMPLATE/2-feature_request.yml new file mode 100644 index 00000000..def1b457 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/2-feature_request.yml @@ -0,0 +1,127 @@ +name: 💡 Feature or enhancement request +description: Suggest an idea or improvement for this project +title: "Feature Request: ..." +assignees: + - pirate +type: 'Enhancement' +labels: 'status: idea phase' +body: + - type: dropdown + id: suggestion_type + validations: + required: true + attributes: + label: "What type of suggestion are you making?" + multiple: false + options: + - New extractor / type of content to save + - Proposing a new feature + - Modification of existing behavior + - Web UI or UX design improvement + + - type: textarea + id: current_problem + attributes: + label: "What is the problem that your feature request solves?" + description: | + Describe the problem that your feature request solves, feel free to include any screenshots or examples. + placeholder: | + e.g. I need to be able to archive spanish and french subtitle files from a particular movie site https://example.com/somevideos that's going down soon. + validations: + required: true + + - type: textarea + id: proposed_solution + attributes: + label: "What is your proposed solution?" + description: | + Describe the ideal specific solution you'd want, and whether it fits into any broader scope of changes. + placeholder: | + e.g. I specifically need a new archive method to look for multilingual subtitle files related to pages. + The bigger picture solution is the ability for custom user scripts to be run in a puppeteer context during archiving. + validations: + required: true + + - type: textarea + id: workarounds_tried + attributes: + label: "What hacks or alternative solutions have you tried to solve the problem?" + description: | + A clear and concise description of any alternative solutions, workarounds, or other software you've considered using to fix the problem. + placeholder: | + e.g. I wait for archivebox to finish archiving the page, then I manually run `yt-dlp --subs ` inside + the `data/archive//` directory to download the subtitle files and add them to the snapshot folder. + validations: + required: true + + - type: textarea + id: version + attributes: + label: What version of ArchiveBox are you currently using? + description: | + We need to know what version of ArchiveBox and what OS you're currently using in order to contextualize your feature request. + Sometimes we've already fixed the issues in newer BETA versions, sometimes features already exist but may not be available in specific environments/versions. + + Run the `archivebox version` command inside your current collection dir and paste the *full output* here (*not just the version number*). + For Docker Compose run: `docker compose run archivebox version` + For plain Docker run: `docker run -v $PWD:/data archivebox/archivebox version` + render: shell + placeholder: | + 0.8.6 + ArchiveBox v0.8.6rc0 COMMIT_HASH=721427a BUILD_TIME=2024-10-21 12:57:02 1729515422 + IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-15.1-arm64-arm-64bit PYTHON=Cpython (venv) + EUID=502:20 UID=502:20 PUID=502:20 FS_UID=502:20 FS_PERMS=644 FS_ATOMIC=True FS_REMOTE=False + DEBUG=False IS_TTY=True SUDO=False ID=dfa11485:aa78ad45 SEARCH_BACKEND=ripgrep LDAP=False + + Binary Dependencies: + √ python 3.11.9 venv_pip ~/.venv/bin/python + √ django 5.1.2 venv_pip ~/.venv/lib/python3.11/site-packages/django/__init__.py + √ sqlite 2.6.0 venv_pip ~/.venv/lib/python3.11/site-packages/django/db/backends/sqlite3/base.py + √ pip 24.3.1 venv_pip ~/.venv/bin/pip + ... + validations: + required: true + + - type: checkboxes + id: priority + attributes: + label: "How badly do you want this new feature?" + options: + - label: "It's an urgent deal-breaker, I can't live without it" + required: false + - label: "It's important to add it in the near-mid term future" + required: false + - label: "It would be nice to have eventually" + required: false + - label: "I'm willing to [work on a PR](https://github.com/ArchiveBox/ArchiveBox#archivebox-development) to develop this myself" + required: false + - label: "I have [donated money](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations) to go towards fixing this issue" + required: false + + - type: checkboxes + id: satisfaction_survey + attributes: + label: Mini Survey + description: How do you like ArchiveBox so far? + options: + - label: "I like ArchiveBox so far / would recommend it to a friend" + required: false + - label: "I've had a lot of difficulty getting ArchiveBox set up" + required: false + - label: "I would pay $10/mo for a hosted version of ArchiveBox if it had this feature" + required: false + + - type: markdown + attributes: + value: | + --- + + We strive to answer issues as quickly as possible, it usually takes us *about a ~week* to respond. + Make sure your `data/` is [**fully backed up**](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#disk-layout) before trying any workarounds or BETAs suggested here, **we are not responsible for data loss**. + + In the meantime please consider: + + - 💰 [Donating to support ArchiveBox open-source](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations) + - 📊 [Hiring us for corporate deployments](https://docs.monadical.com/s/archivebox-consulting-services) with professional support, custom feature development, and help with CAPTCHAs/rate-limits + - 🔍 [Searching the Documentation](https://docs.archivebox.io/) for answers to common questions + - ✨ Testing out a newer [`BETA` release](https://github.com/ArchiveBox/ArchiveBox/releases) (issues are often already fixed in our latest `BETA` releases) diff --git a/.github/ISSUE_TEMPLATE/3-documentation_change.yml b/.github/ISSUE_TEMPLATE/3-documentation_change.yml new file mode 100644 index 00000000..c711f089 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3-documentation_change.yml @@ -0,0 +1,52 @@ +name: 📑 Documentation improvement +description: Submit an idea or correction for the Wiki documentation +title: "Documentation: ..." +labels: 'touches: docs' +type: 'Enhancement' +assignees: + - pirate +body: + - type: markdown + attributes: + value: | + If you prefer, you can submit a [Pull Request](https://github.com/ArchiveBox/docs) on https://github.com/ArchiveBox/docs to edit the docs directly instead. + + - type: input + id: page_url + validations: + required: true + attributes: + label: "What is the URL of the page you'd like to see improved?" + placeholder: e.g. https://github.com/ArchiveBox/docs/wiki/Install + + - type: input + id: section_title + validations: + required: true + attributes: + label: "What is the title of the relevant section?" + placeholder: e.g. Option B. Automatic Setup Script + + - type: textarea + id: suggested_edit + attributes: + label: "What is the suggested edit?" + placeholder: | + e.g. Please document how to run the automatic setup script for ArchiveBox on TempleOS. + Attach images, screenshots, code snippets, etc. anything you think would help. + validations: + required: true + + - type: markdown + attributes: + value: | + --- + + We strive to address issues as quickly as possible, it usually takes us *about a ~week* to respond. + + In the meantime please consider: + + - 💰 [Donating to support ArchiveBox open-source](https://github.com/ArchiveBox/ArchiveBox/wiki/Donations) + - 👨‍✈️ [Hiring us for corporate deployments](https://docs.monadical.com/s/archivebox-consulting-services) with professional support, custom feature development, and help with CAPTCHAs/rate-limits + - 🔍 [Checking out the new ReadTheDocs Documentation](https://docs.archivebox.io/) + - ✨ Helping us test a newer [`BETA` release](https://github.com/ArchiveBox/ArchiveBox/releases) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 086e3d7b..00000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -name: 🐞 Bug report -about: Create a report to help us improve -title: 'Bug: ...' -labels: 'bug' -assignees: '' - ---- - - - -#### Describe the bug - - -#### Steps to reproduce - - -#### Screenshots or log output - - - -#### ArchiveBox version - - -```logs -replace this line with the *full*, unshortened output of running `archivebox version` -``` - diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000..3ac6473a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,8 @@ +blank_issues_enabled: false +contact_links: + - name: 💬 Chat with us on Zulip + url: https://zulip.archivebox.io + about: "Join us on our Zulip forum to chat with the developers and other users (it's similar to Discord but self-hosted)." + - name: 💁‍♂️ Hire us to provide archiving for your organization + url: https://docs.monadical.com/s/archivebox-consulting-services + about: "We provide hosting, develoment, and support, including on-prem/cloud setup w/ SSO & storage, CAPTCHA/rate-limiting avoidance, etc." diff --git a/.github/ISSUE_TEMPLATE/documentation_change.md b/.github/ISSUE_TEMPLATE/documentation_change.md deleted file mode 100644 index 99b8775f..00000000 --- a/.github/ISSUE_TEMPLATE/documentation_change.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -name: 📑 Documentation change -about: Submit a suggestion for the Wiki documentation -title: 'Documentation: Improvement request ...' -labels: '' -assignees: '' - ---- - - -## Wiki Page URL - - - -## Suggested Edit - - diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index 5378139f..00000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -name: 💡 Feature request -about: Suggest an idea for this project -title: 'Feature Request: ...' -labels: 'changes: behavior,status: idea phase' -assignees: '' - ---- - - - -## Type - - - [ ] General question or discussion - - [ ] Propose a brand new feature - - [ ] Request modification of existing behavior or design - -## What is the problem that your feature request solves - - -## Describe the ideal specific solution you'd want, and whether it fits into any broader scope of changes - - -## What hacks or alternative solutions have you tried to solve the problem? - - -## How badly do you want this new feature? - - - [ ] It's an urgent deal-breaker, I can't live without it - - [ ] It's important to add it in the near-mid term future - - [ ] It would be nice to have eventually - ---- - - - [ ] I'm willing to contribute [dev time](https://github.com/ArchiveBox/ArchiveBox#archivebox-development) / [money](https://github.com/sponsors/pirate) to fix this issue - - [ ] I like ArchiveBox so far / would recommend it to a friend - - [ ] I've had a lot of difficulty getting ArchiveBox set up diff --git a/.github/ISSUE_TEMPLATE/question_or_discussion.md b/.github/ISSUE_TEMPLATE/question_or_discussion.md deleted file mode 100644 index 4b7fb02f..00000000 --- a/.github/ISSUE_TEMPLATE/question_or_discussion.md +++ /dev/null @@ -1,9 +0,0 @@ ---- -name: 💬 Question, discussion, or support request -about: Start a discussion or ask a question about ArchiveBox -title: 'Question: ...' -labels: '' -assignees: '' - ---- - diff --git a/README.md b/README.md index bda15ae2..786b494f 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ Without active preservation effort, everything on the internet eventually disapp *ArchiveBox is an open source tool that lets organizations & individuals archive both public & private web content while retaining control over their data. It can be used to save copies of bookmarks, preserve evidence for legal cases, backup photos from FB/Insta/Flickr or media from YT/Soundcloud/etc., save research papers, and more...*
-> ➡️ Get ArchiveBox with `pip install archivebox` on [Linux](#quickstart), [macOS](#quickstart), and [Windows](#quickstart) (WSL2), or via **[Docker](#quickstart)** ⭐️. +> ➡️ Get ArchiveBox with `pip install archivebox` on [Linux](#quickstart)/[macOS](#quickstart), or via **[Docker](#quickstart)** ⭐️ on any OS. *Once installed, it can be used as a [CLI tool](#usage), [self-hosted Web App](https://github.com/ArchiveBox/ArchiveBox/wiki/Publishing-Your-Archive), [Python library](https://github.com/ArchiveBox/ArchiveBox/wiki/Usage#python-shell-usage), or [one-off command](#static-archive-exporting).* diff --git a/archivebox/abid_utils/models.py b/archivebox/abid_utils/models.py index f36bfcd9..5a9ab109 100644 --- a/archivebox/abid_utils/models.py +++ b/archivebox/abid_utils/models.py @@ -144,9 +144,10 @@ class ABIDModel(models.Model): allowed_to_invalidate_abid = self.abid_drift_allowed if (abid_drift_allowed is None) else abid_drift_allowed if allowed_to_invalidate_abid: - print(f'\n#### WARNING: Change allowed despite it invalidating the ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})!', self.abid) - print(change_error) - print('--------------------------------------------------------------------------------------------------') + # print(f'\n#### WARNING: Change allowed despite it invalidating the ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})!', self.abid) + # print(change_error) + # print('--------------------------------------------------------------------------------------------------') + pass else: print(f'\n#### ERROR: Change blocked because it would invalidate ABID of an existing record ({self.__class__.__name__}.abid_drift_allowed={self.abid_drift_allowed})', self.abid) print(change_error) diff --git a/archivebox/api/v1_api.py b/archivebox/api/v1_api.py index 7076f5d1..b989bc92 100644 --- a/archivebox/api/v1_api.py +++ b/archivebox/api/v1_api.py @@ -64,7 +64,7 @@ class NinjaAPIWithIOCapture(NinjaAPI): # Add Auth Headers to response api_token = getattr(request, '_api_token', None) - token_expiry = api_token.expires.isoformat() if api_token else 'Never' + token_expiry = api_token.expires.isoformat() if api_token and api_token.expires else 'Never' response['X-ArchiveBox-Auth-Method'] = getattr(request, '_api_auth_method', None) or 'None' response['X-ArchiveBox-Auth-Expires'] = token_expiry diff --git a/archivebox/config/__init__.py b/archivebox/config/__init__.py index 55a76384..8513d682 100644 --- a/archivebox/config/__init__.py +++ b/archivebox/config/__init__.py @@ -9,4 +9,28 @@ from .paths import ( from .constants import CONSTANTS, CONSTANTS_CONFIG, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .version import VERSION # noqa +# import abx +# @abx.hookimpl +# def get_CONFIG(): +# from .common import ( +# SHELL_CONFIG, +# STORAGE_CONFIG, +# GENERAL_CONFIG, +# SERVER_CONFIG, +# ARCHIVING_CONFIG, +# SEARCH_BACKEND_CONFIG, +# ) +# return { +# 'SHELL_CONFIG': SHELL_CONFIG, +# 'STORAGE_CONFIG': STORAGE_CONFIG, +# 'GENERAL_CONFIG': GENERAL_CONFIG, +# 'SERVER_CONFIG': SERVER_CONFIG, +# 'ARCHIVING_CONFIG': ARCHIVING_CONFIG, +# 'SEARCHBACKEND_CONFIG': SEARCH_BACKEND_CONFIG, +# } + +# @abx.hookimpl +# def ready(): +# for config in get_CONFIG().values(): +# config.validate() diff --git a/archivebox/config/common.py b/archivebox/config/common.py index 238fcfac..ee6c438b 100644 --- a/archivebox/config/common.py +++ b/archivebox/config/common.py @@ -13,7 +13,7 @@ from django.utils.crypto import get_random_string from abx_spec_config.base_configset import BaseConfigSet from .constants import CONSTANTS -from .version import get_COMMIT_HASH, get_BUILD_TIME +from .version import get_COMMIT_HASH, get_BUILD_TIME, VERSION from .permissions import IN_DOCKER ###################### Config ########################## @@ -31,9 +31,6 @@ class ShellConfig(BaseConfigSet): ANSI: Dict[str, str] = Field(default=lambda c: CONSTANTS.DEFAULT_CLI_COLORS if c.USE_COLOR else CONSTANTS.DISABLED_CLI_COLORS) - # VERSIONS_AVAILABLE: bool = False # .check_for_update.get_versions_available_on_github(c)}, - # CAN_UPGRADE: bool = False # .check_for_update.can_upgrade(c)}, - @property def TERM_WIDTH(self) -> int: if not self.IS_TTY: @@ -47,6 +44,7 @@ class ShellConfig(BaseConfigSet): @property def BUILD_TIME(self) -> str: return get_BUILD_TIME() + SHELL_CONFIG = ShellConfig() @@ -76,7 +74,6 @@ STORAGE_CONFIG = StorageConfig() class GeneralConfig(BaseConfigSet): TAG_SEPARATOR_PATTERN: str = Field(default=r'[,]') - GENERAL_CONFIG = GeneralConfig() @@ -87,6 +84,7 @@ class ServerConfig(BaseConfigSet): CSRF_TRUSTED_ORIGINS: str = Field(default=lambda c: 'http://localhost:8000,http://127.0.0.1:8000,http://0.0.0.0:8000,http://{}'.format(c.BIND_ADDR)) SNAPSHOTS_PER_PAGE: int = Field(default=40) + PREVIEW_ORIGINALS: bool = Field(default=True) FOOTER_INFO: str = Field(default='Content is hosted for personal archiving purposes only. Contact server owner for any takedown requests.') # CUSTOM_TEMPLATES_DIR: Path = Field(default=None) # this is now a constant @@ -96,10 +94,10 @@ class ServerConfig(BaseConfigSet): ADMIN_USERNAME: str = Field(default=None) ADMIN_PASSWORD: str = Field(default=None) + REVERSE_PROXY_USER_HEADER: str = Field(default='Remote-User') REVERSE_PROXY_WHITELIST: str = Field(default='') LOGOUT_REDIRECT_URL: str = Field(default='/') - PREVIEW_ORIGINALS: bool = Field(default=True) SERVER_CONFIG = ServerConfig() @@ -113,7 +111,7 @@ class ArchivingConfig(BaseConfigSet): MEDIA_MAX_SIZE: str = Field(default='750m') RESOLUTION: str = Field(default='1440,2000') CHECK_SSL_VALIDITY: bool = Field(default=True) - USER_AGENT: str = Field(default='Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)') + USER_AGENT: str = Field(default=f'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 ArchiveBox/{VERSION} (+https://github.com/ArchiveBox/ArchiveBox/)') COOKIES_FILE: Path | None = Field(default=None) URL_DENYLIST: str = Field(default=r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$', alias='URL_BLACKLIST') @@ -131,17 +129,15 @@ class ArchivingConfig(BaseConfigSet): # CHROME_HEADLESS: bool = Field(default=True) # CHROME_SANDBOX: bool = Field(default=lambda: not SHELL_CONFIG.IN_DOCKER) - @field_validator('TIMEOUT', mode='after') - def validate_timeout(cls, v): - if int(v) < 5: - print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={v} seconds)[/red]', file=sys.stderr) + def validate(self): + if int(self.TIMEOUT) < 5: + print(f'[red][!] Warning: TIMEOUT is set too low! (currently set to TIMEOUT={self.TIMEOUT} seconds)[/red]', file=sys.stderr) print(' You must allow *at least* 5 seconds for indexing and archive methods to run succesfully.', file=sys.stderr) print(' (Setting it to somewhere between 30 and 3000 seconds is recommended)', file=sys.stderr) print(file=sys.stderr) print(' If you want to make ArchiveBox run faster, disable specific archive methods instead:', file=sys.stderr) print(' https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#archive-method-toggles', file=sys.stderr) print(file=sys.stderr) - return v @field_validator('CHECK_SSL_VALIDITY', mode='after') def validate_check_ssl_validity(cls, v): @@ -164,15 +160,17 @@ class ArchivingConfig(BaseConfigSet): @property def SAVE_ALLOWLIST_PTNS(self) -> Dict[re.Pattern, List[str]]: return { - re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v - for k, v in self.SAVE_ALLOWLIST.items() + # regexp: methods list + re.compile(key, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): val + for key, val in self.SAVE_ALLOWLIST.items() } if self.SAVE_ALLOWLIST else {} @property def SAVE_DENYLIST_PTNS(self) -> Dict[re.Pattern, List[str]]: return { - re.compile(k, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): v - for k, v in self.SAVE_DENYLIST.items() + # regexp: methods list + re.compile(key, CONSTANTS.ALLOWDENYLIST_REGEX_FLAGS): val + for key, val in self.SAVE_DENYLIST.items() } if self.SAVE_DENYLIST else {} ARCHIVING_CONFIG = ArchivingConfig() diff --git a/archivebox/core/admin_snapshots.py b/archivebox/core/admin_snapshots.py index 2bd08421..84558632 100644 --- a/archivebox/core/admin_snapshots.py +++ b/archivebox/core/admin_snapshots.py @@ -13,7 +13,7 @@ from django.template import Template, RequestContext from django.contrib.admin.helpers import ActionForm from django.contrib.admin.widgets import FilteredSelectMultiple -from archivebox.config import DATA_DIR, VERSION +from archivebox.config import DATA_DIR from archivebox.config.common import SERVER_CONFIG from archivebox.misc.util import htmldecode, urldecode from archivebox.misc.paginators import AccelleratedPaginator @@ -32,8 +32,8 @@ from core.admin_tags import TagInline from core.admin_archiveresults import ArchiveResultInline, result_url -GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False} - +# GLOBAL_CONTEXT = {'VERSION': VERSION, 'VERSIONS_AVAILABLE': [], 'CAN_UPGRADE': False} +GLOBAL_CONTEXT = {} class SnapshotActionForm(ActionForm): diff --git a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py index 2a12f492..6883cdd1 100644 --- a/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py +++ b/archivebox/pkgs/abx-plugin-chrome/abx_plugin_chrome/config.py @@ -188,6 +188,10 @@ class ChromeConfig(BaseConfigSet): # cmd_args += ('--timeout={}'.format(options.CHROME_TIMEOUT * 1000),) if options.CHROME_USER_DATA_DIR: + # remove SingletonLock file + lockfile = options.CHROME_USER_DATA_DIR / options.CHROME_PROFILE_NAME / 'SingletonLock' + lockfile.unlink(missing_ok=True) + cmd_args.append('--user-data-dir={}'.format(options.CHROME_USER_DATA_DIR)) cmd_args.append('--profile-directory={}'.format(options.CHROME_PROFILE_NAME or 'Default')) diff --git a/archivebox/templates/admin/base.html b/archivebox/templates/admin/base.html index 8c97e10f..192cc323 100644 --- a/archivebox/templates/admin/base.html +++ b/archivebox/templates/admin/base.html @@ -148,6 +148,7 @@ {% block footer %}{% endblock %} + {% comment %} {% if user.is_authenticated and user.is_superuser and CAN_UPGRADE %} {% endif %} + {% endcomment %}