mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
Merge branch 'dev' into side-fixes
This commit is contained in:
commit
89bdda85e9
12 changed files with 255 additions and 1037 deletions
|
@ -16,6 +16,7 @@ venv/
|
|||
.docker-venv/
|
||||
node_modules/
|
||||
|
||||
docs/
|
||||
build/
|
||||
dist/
|
||||
brew_dist/
|
||||
|
|
19
.github/workflows/docker.yml
vendored
19
.github/workflows/docker.yml
vendored
|
@ -11,8 +11,7 @@ on:
|
|||
|
||||
env:
|
||||
DOCKER_IMAGE: archivebox-ci
|
||||
|
||||
|
||||
|
||||
jobs:
|
||||
buildx:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -60,13 +59,11 @@ jobs:
|
|||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: archivebox/archivebox,nikisweeting/archivebox
|
||||
flavor: |
|
||||
latest=auto
|
||||
tags: |
|
||||
type=ref,event=branch
|
||||
type=semver,pattern={{version}}
|
||||
type=semver,pattern={{major}}.{{minor}}
|
||||
type=sha
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Build and push
|
||||
id: docker_build
|
||||
|
@ -78,8 +75,18 @@ jobs:
|
|||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache-new
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
|
||||
- name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
||||
# This ugly bit is necessary if you don't want your cache to grow forever
|
||||
# until it hits GitHub's limit of 5GB.
|
||||
# Temp fix
|
||||
# https://github.com/docker/build-push-action/issues/252
|
||||
# https://github.com/moby/buildkit/issues/1896
|
||||
- name: Move cache
|
||||
run: |
|
||||
rm -rf /tmp/.buildx-cache
|
||||
mv /tmp/.buildx-cache-new /tmp/.buildx-cache
|
||||
|
|
24
Dockerfile
24
Dockerfile
|
@ -73,7 +73,8 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
|
|||
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
|
||||
|
||||
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
|
||||
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
|
||||
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
|
||||
&& rm -f /etc/apt/apt.conf.d/docker-clean
|
||||
|
||||
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
|
||||
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
|
||||
|
@ -123,7 +124,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
|
||||
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
|
||||
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||
&& curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
nodejs libatomic1 python3-minimal \
|
||||
|
@ -202,7 +203,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
|
||||
# Save version info
|
||||
&& ( \
|
||||
which chromium-browser && /usr/bin/chromium-browser --version \
|
||||
which chromium-browser && /usr/bin/chromium-browser --version || /usr/lib/chromium/chromium --version \
|
||||
&& echo -e '\n\n' \
|
||||
) | tee -a /VERSION.txt
|
||||
|
||||
|
@ -246,15 +247,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
|||
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
|
||||
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
|
||||
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
|
||||
&& apt-get update -qq \
|
||||
# && apt-get update -qq \
|
||||
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
|
||||
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
build-essential \
|
||||
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||
# build-essential \
|
||||
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
|
||||
&& pip install -e "$CODE_DIR"[sonic,ldap] \
|
||||
# save docker image size and always remove compilers / build tools after building is complete
|
||||
&& apt-get purge -y build-essential \
|
||||
&& apt-get autoremove -y \
|
||||
# && apt-get purge -y build-essential \
|
||||
# && apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
####################################################
|
||||
|
@ -276,11 +277,10 @@ ENV IN_DOCKER=True
|
|||
|
||||
# Print version for nice docker finish summary
|
||||
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
|
||||
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
|
||||
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
|
||||
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
|
||||
archivebox version 2>&1 \
|
||||
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})\n" \
|
||||
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
|
||||
) | tee -a /VERSION.txt
|
||||
RUN "$CODE_DIR"/bin/docker_entrypoint.sh version 2>&1 | tee -a /VERSION.txt
|
||||
|
||||
####################################################
|
||||
|
||||
|
|
11
README.md
11
README.md
|
@ -1,5 +1,5 @@
|
|||
<div align="center">
|
||||
<em><img src="icon.png" height="90px"></em>
|
||||
<em><img src="https://archivebox.io/icon.png" height="90px"></em>
|
||||
<h1>ArchiveBox<br/><sub>Open-source self-hosted web archiving.</sub></h1>
|
||||
|
||||
<br/>
|
||||
|
@ -33,9 +33,9 @@ curl -sSL 'https://get.archivebox.io' | sh # (or see pip/brew/Docker instruct
|
|||
|
||||
**ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.**
|
||||
|
||||
Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they cant save every type of content.
|
||||
Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they can't save every type of content.
|
||||
|
||||
*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save sharable copies of browser bookmarks, preserve evidence for legal cases, backup photos on FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers in academic citations, and more...*
|
||||
*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save copies of browser bookmarks, preserve evidence for legal cases, backup photos from FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers & academic citations, and more...*
|
||||
|
||||
> ➡️ *Use ArchiveBox as a [command-line package](#quickstart) and/or [self-hosted web app](#quickstart) on Linux, macOS, or in [Docker](#quickstart).*
|
||||
|
||||
|
@ -320,6 +320,10 @@ See the <a href="https://github.com/ArchiveBox/pip-archivebox"><code>pip-archive
|
|||
<details>
|
||||
<summary><img src="https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png" alt="Arch" height="28px" align="top"/> <code>pacman</code> / <img src="https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png" alt="FreeBSD" height="28px" align="top"/> <code>pkg</code> / <img src="https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png" alt="Nix" height="28px" align="top"/> <code>nix</code> (Arch/FreeBSD/NixOS/more)</summary>
|
||||
<br/>
|
||||
|
||||
> [!WARNING]
|
||||
> *These are contributed by external volunteers and may lag behind the official `pip` channel.*
|
||||
|
||||
<ul>
|
||||
<li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>yay -S archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li>
|
||||
<li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li>
|
||||
|
@ -366,6 +370,7 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
|
|||
<br/>
|
||||
Other providers of paid ArchiveBox hosting (not officially endorsed):<br/>
|
||||
<br/><br/>
|
||||
<li><a href="https://elest.io/open-source/archivebox"><img src="https://img.shields.io/badge/Managed_Hosting-Elest.io-%23193f7e.svg?style=flat" height="22px"/></a></li>
|
||||
<li><a href="https://www.stellarhosted.com/archivebox/"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat" height="22px"/></a> (USD $29-250/mo, <a href="https://www.stellarhosted.com/archivebox/#pricing">pricing</a>)</li>
|
||||
<li><a href="https://www.pikapods.com/pods?run=archivebox"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-PikaPods.com-%2343a047.svg?style=flat" height="22px"/></a> (from USD $2.6/mo)</li>
|
||||
<li><a href="https://m.do.co/c/cbc4c0c17840">
|
||||
|
|
|
@ -391,7 +391,7 @@ def get_version(config):
|
|||
|
||||
raise Exception('Failed to detect installed archivebox version!')
|
||||
|
||||
def get_commit_hash(config):
|
||||
def get_commit_hash(config) -> Optional[str]:
|
||||
try:
|
||||
git_dir = config['PACKAGE_DIR'] / '../'
|
||||
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
|
||||
|
@ -400,6 +400,14 @@ def get_commit_hash(config):
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
def get_build_time(config) -> str:
|
||||
if config['IN_DOCKER']:
|
||||
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
|
||||
return docker_build_end_time
|
||||
|
||||
src_last_modified_unix_timestamp = (config['PACKAGE_DIR'] / 'config.py').stat().st_mtime
|
||||
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
|
||||
|
||||
############################## Derived Config ##################################
|
||||
|
||||
|
||||
|
@ -426,8 +434,9 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
|
|||
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
|
||||
|
||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
|
||||
'VERSION': {'default': lambda c: get_version(c)},
|
||||
'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]},
|
||||
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
|
||||
'BUILD_TIME': {'default': lambda c: get_build_time(c)},
|
||||
|
||||
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
||||
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
|
||||
|
@ -1113,14 +1122,25 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
|
|||
|
||||
def check_system_config(config: ConfigDict=CONFIG) -> None:
|
||||
### Check system environment
|
||||
if config['USER'] == 'root':
|
||||
if config['USER'] == 'root' or str(config['PUID']) == "0":
|
||||
stderr('[!] ArchiveBox should never be run as root!', color='red')
|
||||
stderr(' For more information, see the security overview documentation:')
|
||||
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
|
||||
|
||||
if config['IN_DOCKER']:
|
||||
attempted_command = ' '.join(sys.argv[:3])
|
||||
stderr('')
|
||||
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
|
||||
stderr(f' docker compose run archivebox {attempted_command}')
|
||||
stderr(f' docker compose exec --user=archivebox archivebox {attempted_command}')
|
||||
stderr(' or')
|
||||
stderr(f' docker run -it -v ... -p ... archivebox/archivebox {attempted_command}')
|
||||
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash')
|
||||
|
||||
raise SystemExit(2)
|
||||
|
||||
### Check Python environment
|
||||
if sys.version_info[:3] < (3, 6, 0):
|
||||
if sys.version_info[:3] < (3, 7, 0):
|
||||
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
|
||||
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
|
||||
raise SystemExit(2)
|
||||
|
@ -1284,8 +1304,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
|||
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
|
||||
command = ' '.join(sys.argv)
|
||||
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
|
||||
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
|
||||
|
||||
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
|
||||
|
||||
if check_db:
|
||||
# Enable WAL mode in sqlite3
|
||||
|
|
|
@ -93,11 +93,14 @@ from .config import (
|
|||
SQL_INDEX_FILENAME,
|
||||
ALLOWED_IN_OUTPUT_DIR,
|
||||
SEARCH_BACKEND_ENGINE,
|
||||
LDAP,
|
||||
get_version,
|
||||
check_dependencies,
|
||||
check_data_folder,
|
||||
write_config_file,
|
||||
VERSION,
|
||||
COMMIT_HASH,
|
||||
BUILD_TIME,
|
||||
CODE_LOCATIONS,
|
||||
EXTERNAL_LOCATIONS,
|
||||
DATA_LOCATIONS,
|
||||
|
@ -218,31 +221,39 @@ def version(quiet: bool=False,
|
|||
|
||||
if not quiet:
|
||||
# 0.7.1
|
||||
# ArchiveBox v0.7.1 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
|
||||
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
|
||||
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
|
||||
# IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
|
||||
# FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
|
||||
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
|
||||
|
||||
p = platform.uname()
|
||||
print(
|
||||
'ArchiveBox v{}'.format(VERSION),
|
||||
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
|
||||
sys.implementation.name.title(),
|
||||
p.system,
|
||||
platform.platform(),
|
||||
p.machine,
|
||||
'ArchiveBox v{}'.format(get_version(CONFIG)),
|
||||
*((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
|
||||
f'BUILD_TIME={BUILD_TIME}',
|
||||
)
|
||||
print(
|
||||
f'IN_DOCKER={IN_DOCKER}',
|
||||
f'IN_QEMU={IN_QEMU}',
|
||||
f'ARCH={p.machine}',
|
||||
f'OS={p.system}',
|
||||
f'PLATFORM={platform.platform()}',
|
||||
f'PYTHON={sys.implementation.name.title()}',
|
||||
)
|
||||
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
|
||||
print(
|
||||
f'DEBUG={DEBUG}',
|
||||
f'IN_DOCKER={IN_DOCKER}',
|
||||
f'IN_QEMU={IN_QEMU}',
|
||||
f'IS_TTY={IS_TTY}',
|
||||
f'TZ={TIMEZONE}',
|
||||
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
|
||||
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
|
||||
f'FS_USER={PUID}:{PGID}',
|
||||
f'FS_PERMS={OUTPUT_PERMISSIONS}',
|
||||
)
|
||||
print(
|
||||
f'DEBUG={DEBUG}',
|
||||
f'IS_TTY={IS_TTY}',
|
||||
f'TZ={TIMEZONE}',
|
||||
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
|
||||
f'LDAP={LDAP}',
|
||||
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
|
||||
)
|
||||
print()
|
||||
|
||||
|
@ -271,7 +282,7 @@ def version(quiet: bool=False,
|
|||
print(printable_folder_status(name, path))
|
||||
else:
|
||||
print()
|
||||
print('{white}[i] Data locations:{reset}'.format(**ANSI))
|
||||
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
|
||||
|
||||
print()
|
||||
check_dependencies()
|
||||
|
@ -1005,9 +1016,9 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
|
||||
if not NODE_VERSION:
|
||||
stderr('[X] You must first install node using your system package manager', color='red')
|
||||
stderr('[X] You must first install node & npm using your system package manager', color='red')
|
||||
hint([
|
||||
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
|
||||
'https://github.com/nodesource/distributions#table-of-contents',
|
||||
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
|
||||
])
|
||||
raise SystemExit(1)
|
||||
|
|
|
@ -23,6 +23,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
|||
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
|
||||
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||
|
||||
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
|
||||
|
@ -85,12 +86,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
|
|||
-t archivebox/archivebox:$TAG_NAME \
|
||||
-t archivebox/archivebox:$VERSION \
|
||||
-t archivebox/archivebox:$SHORT_VERSION \
|
||||
-t archivebox/archivebox:$GIT_SHA \
|
||||
-t archivebox/archivebox:latest \
|
||||
-t nikisweeting/archivebox \
|
||||
-t nikisweeting/archivebox:$TAG_NAME \
|
||||
-t nikisweeting/archivebox:$VERSION \
|
||||
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||
-t nikisweeting/archivebox:$GIT_SHA \
|
||||
-t nikisweeting/archivebox:latest \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:latest
|
||||
|
|
|
@ -1,20 +1,55 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
|
||||
# It takes a CMD as $* shell arguments and runs it following these setup steps:
|
||||
|
||||
# - Set the archivebox user to use the correct PUID & PGID
|
||||
# 1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
|
||||
# 2. fall back to DETECTED_PUID of files found within existing data dir
|
||||
# 3. fall back to DEFAULT_PUID if no data dir or its owned by root
|
||||
# - Create a new /data dir if necessary and set the correct ownership on it
|
||||
# - Create a new /browsers dir if necessary and set the correct ownership on it
|
||||
# - Check whether we're running inside QEMU emulation and show a warning if so.
|
||||
# - Drop down to archivebox user permisisons and execute passed CMD command.
|
||||
|
||||
# Bash Environment Setup
|
||||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||
# set -o xtrace
|
||||
# set -o nounset
|
||||
set -o errexit
|
||||
set -o errtrace
|
||||
set -o pipefail
|
||||
# IFS=$'\n'
|
||||
|
||||
# Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
|
||||
export DATA_DIR="${DATA_DIR:-/data}"
|
||||
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
|
||||
|
||||
# default PUID and PGID if data dir is empty and no PUID+PGID is set
|
||||
# Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
|
||||
export DEFAULT_PUID=911
|
||||
export DEFAULT_PGID=911
|
||||
|
||||
# if data directory already exists, autodetect detect owner by looking at files within
|
||||
export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
|
||||
export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
|
||||
# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
|
||||
if [[ "$PUID" == "0" ]] || [[ "$PGID" == "0" ]]; then
|
||||
echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
|
||||
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap all permissions, leave PUID/PGID blank" > /dev/stderr
|
||||
echo -e " or set PUID/PGID to the same value as the user/group they remap to (e.g. $DEFAULT_PUID:$DEFAULT_PGID)." > /dev/stderr
|
||||
echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
|
||||
exit 3
|
||||
fi
|
||||
|
||||
# Set the archivebox user to use the configured UID & GID
|
||||
# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
|
||||
usermod -o -u "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||
groupmod -o -g "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||
# If data directory already exists, autodetect detect owner by looking at files within
|
||||
export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
|
||||
export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
|
||||
|
||||
# If data directory exists but is owned by root, use defaults instead of root because root is not allowed
|
||||
[[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
|
||||
[[ "$DETECTED_PGID" == "0" ]] && export DETECTED_PGID="$DEFAULT_PGID"
|
||||
|
||||
# Set archivebox user and group ids to desired PUID/PGID
|
||||
usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||
groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
|
||||
|
||||
# re-set PUID and PGID to values reported by system instead of values we tried to set,
|
||||
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
|
||||
|
@ -29,12 +64,12 @@ if [[ -d "$DATA_DIR/archive" ]]; then
|
|||
# echo "[√] Permissions are correct"
|
||||
else
|
||||
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
|
||||
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
|
||||
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
|
||||
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
|
||||
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
|
||||
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
|
||||
exit 1
|
||||
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2
|
||||
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
|
||||
echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
|
||||
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
|
||||
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
|
||||
exit 3
|
||||
fi
|
||||
else
|
||||
# create data directory
|
||||
|
@ -46,29 +81,35 @@ fi
|
|||
chown $PUID:$PGID "$DATA_DIR"
|
||||
chown $PUID:$PGID "$DATA_DIR"/*
|
||||
|
||||
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
|
||||
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
|
||||
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
|
||||
mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
|
||||
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
|
||||
chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
|
||||
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
|
||||
rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
|
||||
|
||||
|
||||
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
|
||||
export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
|
||||
if [[ "$IN_QEMU" == 'True' ]]; then
|
||||
echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
|
||||
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
|
||||
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
|
||||
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
|
||||
if [[ "$IN_QEMU" == "True" ]]; then
|
||||
echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
|
||||
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
|
||||
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
|
||||
fi
|
||||
|
||||
|
||||
# Drop permissions to run commands as the archivebox user
|
||||
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
|
||||
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
|
||||
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
|
||||
# e.g. "docker run archivebox archivebox init:
|
||||
# "docker run archivebox /venv/bin/archivebox-alt init"
|
||||
# "docker run archivebox /bin/bash -c '...'"
|
||||
# "docker run archivebox echo test"
|
||||
# "docker run archivebox cat /VERSION.txt"
|
||||
exec gosu "$PUID" bash -c "$*"
|
||||
else
|
||||
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
|
||||
# e.g. "docker run archivebox add --depth=1 https://example.com"
|
||||
# e.g. "docker run archivebox help"
|
||||
# "docker run archivebox add --depth=1 https://example.com"
|
||||
# "docker run archivebox manage createsupseruser"
|
||||
# "docker run archivebox server 0.0.0.0:8000"
|
||||
exec gosu "$PUID" bash -c "archivebox $*"
|
||||
|
|
|
@ -18,6 +18,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
|
|||
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
|
||||
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
|
||||
|
||||
|
||||
|
@ -34,12 +35,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
|
|||
-t archivebox/archivebox:$TAG_NAME \
|
||||
-t archivebox/archivebox:$VERSION \
|
||||
-t archivebox/archivebox:$SHORT_VERSION \
|
||||
-t archivebox/archivebox:$GIT_SHA \
|
||||
-t archivebox/archivebox:latest \
|
||||
-t nikisweeting/archivebox \
|
||||
-t nikisweeting/archivebox:$TAG_NAME \
|
||||
-t nikisweeting/archivebox:$VERSION \
|
||||
-t nikisweeting/archivebox:$SHORT_VERSION \
|
||||
-t nikisweeting/archivebox:$GIT_SHA \
|
||||
-t nikisweeting/archivebox:latest \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
|
||||
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA
|
||||
|
||||
|
|
|
@ -3,10 +3,9 @@ name = "archivebox"
|
|||
version = "0.7.1"
|
||||
description = "Self-hosted internet archiving solution."
|
||||
authors = [
|
||||
{name = "Nick Sweeting", email = "setup.py@archivebox.io"},
|
||||
{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"},
|
||||
]
|
||||
dependencies = [
|
||||
# "setuptools>=68.2.2",
|
||||
"croniter>=0.3.34",
|
||||
"dateparser>=1.0.0",
|
||||
"django-extensions>=3.0.3",
|
||||
|
@ -16,8 +15,7 @@ dependencies = [
|
|||
"python-crontab>=2.5.1",
|
||||
"requests>=2.24.0",
|
||||
"w3lib>=1.22.0",
|
||||
# "youtube-dl>=2021.04.17",
|
||||
"yt-dlp>=2021.4.11",
|
||||
"yt-dlp>=2023.10.13",
|
||||
# "playwright>=1.39.0; platform_machine != 'armv7l'",
|
||||
]
|
||||
requires-python = ">=3.9"
|
||||
|
@ -41,6 +39,9 @@ classifiers = [
|
|||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Topic :: Internet :: WWW/HTTP",
|
||||
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
|
||||
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
|
||||
|
@ -56,34 +57,26 @@ classifiers = [
|
|||
# pdm lock -G:all
|
||||
# pdm install -G:all
|
||||
[tool.pdm.dev-dependencies]
|
||||
build = [
|
||||
"setuptools",
|
||||
dev = [
|
||||
# build
|
||||
"setuptools>=68.2.2",
|
||||
"wheel",
|
||||
"pdm",
|
||||
# "bottle",
|
||||
# "stdeb",
|
||||
# "twine",
|
||||
]
|
||||
lint = [
|
||||
"flake8",
|
||||
"mypy",
|
||||
"django-stubs",
|
||||
]
|
||||
test = [
|
||||
"pytest",
|
||||
]
|
||||
debug = [
|
||||
"django-debug-toolbar",
|
||||
"djdt_flamegraph",
|
||||
"ipdb",
|
||||
]
|
||||
doc = [
|
||||
"homebrew-pypi-poet>=0.10.0",
|
||||
# docs
|
||||
"recommonmark",
|
||||
"sphinx",
|
||||
"sphinx-rtd-theme",
|
||||
]
|
||||
dev = [
|
||||
"homebrew-pypi-poet>=0.10.0",
|
||||
# debug
|
||||
"django-debug-toolbar",
|
||||
"djdt_flamegraph",
|
||||
"ipdb",
|
||||
# test
|
||||
"pytest",
|
||||
# lint
|
||||
"flake8",
|
||||
"mypy",
|
||||
"django-stubs",
|
||||
]
|
||||
|
||||
[tool.pdm.scripts]
|
||||
|
|
|
@ -1,35 +1,41 @@
|
|||
# This file is @generated by PDM.
|
||||
# Please do not edit it manually.
|
||||
|
||||
appnope==0.1.3
|
||||
asgiref==3.7.2
|
||||
asttokens==2.4.1
|
||||
brotli==1.1.0
|
||||
certifi==2023.7.22
|
||||
brotli==1.1.0; implementation_name == "cpython"
|
||||
brotlicffi==1.1.0.0; implementation_name != "cpython"
|
||||
certifi==2023.11.17
|
||||
cffi==1.16.0; implementation_name != "cpython"
|
||||
charset-normalizer==3.3.2
|
||||
colorama==0.4.6; sys_platform == "win32"
|
||||
croniter==2.0.1
|
||||
dateparser==1.1.8
|
||||
dateparser==1.2.0
|
||||
decorator==5.1.1
|
||||
django==3.1.14
|
||||
django-auth-ldap==4.1.0
|
||||
django-extensions==3.1.5
|
||||
exceptiongroup==1.2.0; python_version < "3.11"
|
||||
executing==2.0.1
|
||||
idna==3.4
|
||||
ipython==8.17.2
|
||||
idna==3.6
|
||||
ipython==8.18.1
|
||||
jedi==0.19.1
|
||||
matplotlib-inline==0.1.6
|
||||
mutagen==1.47.0
|
||||
mypy-extensions==1.0.0
|
||||
parso==0.8.3
|
||||
pexpect==4.8.0
|
||||
prompt-toolkit==3.0.40
|
||||
ptyprocess==0.7.0
|
||||
pexpect==4.9.0; sys_platform != "win32"
|
||||
prompt-toolkit==3.0.43
|
||||
ptyprocess==0.7.0; sys_platform != "win32"
|
||||
pure-eval==0.2.2
|
||||
pyasn1==0.5.0
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.21; implementation_name != "cpython"
|
||||
pycryptodomex==3.19.0
|
||||
pygments==2.16.1
|
||||
pygments==2.17.2
|
||||
python-crontab==3.0.0
|
||||
python-dateutil==2.8.2
|
||||
python-ldap==3.4.4
|
||||
pytz==2023.3.post1
|
||||
regex==2023.10.3
|
||||
requests==2.31.0
|
||||
|
@ -37,10 +43,12 @@ six==1.16.0
|
|||
sonic-client==1.0.0
|
||||
sqlparse==0.4.4
|
||||
stack-data==0.6.3
|
||||
traitlets==5.13.0
|
||||
traitlets==5.14.0
|
||||
typing-extensions==4.9.0; python_version < "3.11"
|
||||
tzdata==2023.3; platform_system == "Windows"
|
||||
tzlocal==5.2
|
||||
urllib3==2.1.0
|
||||
w3lib==2.1.2
|
||||
wcwidth==0.2.10
|
||||
wcwidth==0.2.12
|
||||
websockets==12.0
|
||||
yt-dlp==2023.10.13
|
||||
yt-dlp==2023.11.16
|
||||
|
|
Loading…
Reference in a new issue