Merge branch 'dev' into side-fixes

This commit is contained in:
Nick Sweeting 2023-12-18 19:17:31 -08:00 committed by GitHub
commit 89bdda85e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 255 additions and 1037 deletions

View file

@ -16,6 +16,7 @@ venv/
.docker-venv/
node_modules/
docs/
build/
dist/
brew_dist/

View file

@ -11,8 +11,7 @@ on:
env:
DOCKER_IMAGE: archivebox-ci
jobs:
buildx:
runs-on: ubuntu-latest
@ -60,13 +59,11 @@ jobs:
uses: docker/metadata-action@v5
with:
images: archivebox/archivebox,nikisweeting/archivebox
flavor: |
latest=auto
tags: |
type=ref,event=branch
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
type=raw,value=latest,enable={{is_default_branch}}
- name: Build and push
id: docker_build
@ -78,8 +75,18 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.docker_meta.outputs.tags }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache-new
platforms: linux/amd64,linux/arm64,linux/arm/v7
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
# This ugly bit is necessary if you don't want your cache to grow forever
# until it hits GitHub's limit of 5GB.
# Temp fix
# https://github.com/docker/build-push-action/issues/252
# https://github.com/moby/buildkit/issues/1896
- name: Move cache
run: |
rm -rf /tmp/.buildx-cache
mv /tmp/.buildx-cache-new /tmp/.buildx-cache

View file

@ -73,7 +73,8 @@ COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
# Force apt to leave downloaded binaries in /var/cache/apt (massively speeds up Docker builds)
RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
RUN echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \
&& rm -f /etc/apt/apt.conf.d/docker-clean
# Print debug info about build and save it to disk, for human eyes only, not used by anything else
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
@ -123,7 +124,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.npm,sharing=locked,id=npm-$TARGETARCH$TARGETVARIANT \
echo "[+] Installing Node $NODE_VERSION environment in $NODE_MODULES..." \
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& curl -fsSL "https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key" | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
nodejs libatomic1 python3-minimal \
@ -202,7 +203,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
&& chown -R $ARCHIVEBOX_USER "$PLAYWRIGHT_BROWSERS_PATH" \
# Save version info
&& ( \
which chromium-browser && /usr/bin/chromium-browser --version \
which chromium-browser && /usr/bin/chromium-browser --version || /usr/lib/chromium/chromium --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
@ -246,15 +247,15 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT --mount=type=cache,target=/root/.cache/pip,sharing=locked,id=pip-$TARGETARCH$TARGETVARIANT \
echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
&& apt-get update -qq \
# && apt-get update -qq \
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
build-essential \
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
# build-essential \
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
&& pip install -e "$CODE_DIR"[sonic,ldap] \
# save docker image size and always remove compilers / build tools after building is complete
&& apt-get purge -y build-essential \
&& apt-get autoremove -y \
# && apt-get purge -y build-essential \
# && apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
####################################################
@ -276,11 +277,10 @@ ENV IN_DOCKER=True
# Print version for nice docker finish summary
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
archivebox version 2>&1 \
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})\n" \
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
) | tee -a /VERSION.txt
RUN "$CODE_DIR"/bin/docker_entrypoint.sh version 2>&1 | tee -a /VERSION.txt
####################################################

View file

@ -1,5 +1,5 @@
<div align="center">
<em><img src="icon.png" height="90px"></em>
<em><img src="https://archivebox.io/icon.png" height="90px"></em>
<h1>ArchiveBox<br/><sub>Open-source self-hosted web archiving.</sub></h1>
<br/>
@ -33,9 +33,9 @@ curl -sSL 'https://get.archivebox.io' | sh # (or see pip/brew/Docker instruct
**ArchiveBox is a powerful, self-hosted internet archiving solution to collect, save, and view websites offline.**
Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they cant save every type of content.
Without active preservation effort, everything on the internet eventually dissapears or degrades. Archive.org does a great job as a free central archive, but they require all archives to be public, and they can't save every type of content.
*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save sharable copies of browser bookmarks, preserve evidence for legal cases, backup photos on FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers in academic citations, and more...*
*ArchiveBox is an open source tool that helps you archive web content on your own (or privately within an organization): save copies of browser bookmarks, preserve evidence for legal cases, backup photos from FB / Insta / Flickr, download your media from YT / Soundcloud / etc., snapshot research papers & academic citations, and more...*
> ➡️ *Use ArchiveBox as a [command-line package](#quickstart) and/or [self-hosted web app](#quickstart) on Linux, macOS, or in [Docker](#quickstart).*
@ -320,6 +320,10 @@ See the <a href="https://github.com/ArchiveBox/pip-archivebox"><code>pip-archive
<details>
<summary><img src="https://user-images.githubusercontent.com/511499/118077361-f0616580-b381-11eb-973c-ee894a3349fb.png" alt="Arch" height="28px" align="top"/> <code>pacman</code> / <img src="https://user-images.githubusercontent.com/511499/118077946-29e6a080-b383-11eb-94f0-d4871da08c3f.png" alt="FreeBSD" height="28px" align="top"/> <code>pkg</code> / <img src="https://user-images.githubusercontent.com/511499/118077861-002d7980-b383-11eb-86a7-5936fad9190f.png" alt="Nix" height="28px" align="top"/> <code>nix</code> (Arch/FreeBSD/NixOS/more)</summary>
<br/>
> [!WARNING]
> *These are contributed by external volunteers and may lag behind the official `pip` channel.*
<ul>
<li>Arch: <a href="https://aur.archlinux.org/packages/archivebox/"><code>yay -S archivebox</code></a> (contributed by <a href="https://github.com/imlonghao"><code>@imlonghao</code></a>)</li>
<li>FreeBSD: <a href="https://github.com/ArchiveBox/ArchiveBox#%EF%B8%8F-easy-setup"><code>curl -sSL 'https://get.archivebox.io' | sh</code></a> (uses <code>pkg</code> + <code>pip3</code> under-the-hood)</li>
@ -366,6 +370,7 @@ See <a href="#%EF%B8%8F-cli-usage">below</a> for usage examples using the CLI, W
<br/>
Other providers of paid ArchiveBox hosting (not officially endorsed):<br/>
<br/><br/>
<li><a href="https://elest.io/open-source/archivebox"><img src="https://img.shields.io/badge/Managed_Hosting-Elest.io-%23193f7e.svg?style=flat" height="22px"/></a></li>
<li><a href="https://www.stellarhosted.com/archivebox/"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-StellarHosted.com-%23193f7e.svg?style=flat" height="22px"/></a> (USD $29-250/mo, <a href="https://www.stellarhosted.com/archivebox/#pricing">pricing</a>)</li>
<li><a href="https://www.pikapods.com/pods?run=archivebox"><img src="https://img.shields.io/badge/Semi_Managed_Hosting-PikaPods.com-%2343a047.svg?style=flat" height="22px"/></a> (from USD $2.6/mo)</li>
<li><a href="https://m.do.co/c/cbc4c0c17840">

View file

@ -391,7 +391,7 @@ def get_version(config):
raise Exception('Failed to detect installed archivebox version!')
def get_commit_hash(config):
def get_commit_hash(config) -> Optional[str]:
try:
git_dir = config['PACKAGE_DIR'] / '../'
ref = (git_dir / 'HEAD').read_text().strip().split(' ')[-1]
@ -400,6 +400,14 @@ def get_commit_hash(config):
except Exception:
return None
def get_build_time(config) -> str:
if config['IN_DOCKER']:
docker_build_end_time = Path('/VERSION.txt').read_text().rsplit('BUILD_END_TIME=')[-1].split('\n', 1)[0]
return docker_build_end_time
src_last_modified_unix_timestamp = (config['PACKAGE_DIR'] / 'config.py').stat().st_mtime
return datetime.fromtimestamp(src_last_modified_unix_timestamp).strftime('%Y-%m-%d %H:%M:%S %s')
############################## Derived Config ##################################
@ -426,8 +434,9 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'DIR_OUTPUT_PERMISSIONS': {'default': lambda c: c['OUTPUT_PERMISSIONS'].replace('6', '7').replace('4', '5')},
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0] or bin_path('archivebox')},
'VERSION': {'default': lambda c: get_version(c)},
'VERSION': {'default': lambda c: get_version(c).split('+', 1)[0]},
'COMMIT_HASH': {'default': lambda c: get_commit_hash(c)},
'BUILD_TIME': {'default': lambda c: get_build_time(c)},
'PYTHON_BINARY': {'default': lambda c: sys.executable},
'PYTHON_ENCODING': {'default': lambda c: sys.stdout.encoding.upper()},
@ -1113,14 +1122,25 @@ if not CONFIG['CHECK_SSL_VALIDITY']:
def check_system_config(config: ConfigDict=CONFIG) -> None:
### Check system environment
if config['USER'] == 'root':
if config['USER'] == 'root' or str(config['PUID']) == "0":
stderr('[!] ArchiveBox should never be run as root!', color='red')
stderr(' For more information, see the security overview documentation:')
stderr(' https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview#do-not-run-as-root')
if config['IN_DOCKER']:
attempted_command = ' '.join(sys.argv[:3])
stderr('')
stderr(' {lightred}Hint{reset}: When using Docker, you must run commands with {green}docker run{reset} instead of {lightyellow}docker exec{reset}, e.g.:'.format(**config['ANSI']))
stderr(f' docker compose run archivebox {attempted_command}')
stderr(f' docker compose exec --user=archivebox archivebox {attempted_command}')
stderr(' or')
stderr(f' docker run -it -v ... -p ... archivebox/archivebox {attempted_command}')
stderr(f' docker exec -it --user=archivebox <container id> /bin/bash')
raise SystemExit(2)
### Check Python environment
if sys.version_info[:3] < (3, 6, 0):
if sys.version_info[:3] < (3, 7, 0):
stderr(f'[X] Python version is not new enough: {config["PYTHON_VERSION"]} (>3.6 is required)', color='red')
stderr(' See https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting#python for help upgrading your Python installation.')
raise SystemExit(2)
@ -1284,8 +1304,7 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
with open(settings.ERROR_LOG, "a", encoding='utf-8') as f:
command = ' '.join(sys.argv)
ts = datetime.now(timezone.utc).strftime('%Y-%m-%d__%H:%M:%S')
f.write(f"\n> {command}; ts={ts} version={config['VERSION']} docker={config['IN_DOCKER']} is_tty={config['IS_TTY']}\n")
f.write(f"\n> {command}; TS={ts} VERSION={config['VERSION']} IN_DOCKER={config['IN_DOCKER']} IS_TTY={config['IS_TTY']}\n")
if check_db:
# Enable WAL mode in sqlite3

View file

@ -93,11 +93,14 @@ from .config import (
SQL_INDEX_FILENAME,
ALLOWED_IN_OUTPUT_DIR,
SEARCH_BACKEND_ENGINE,
LDAP,
get_version,
check_dependencies,
check_data_folder,
write_config_file,
VERSION,
COMMIT_HASH,
BUILD_TIME,
CODE_LOCATIONS,
EXTERNAL_LOCATIONS,
DATA_LOCATIONS,
@ -218,31 +221,39 @@ def version(quiet: bool=False,
if not quiet:
# 0.7.1
# ArchiveBox v0.7.1 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
# DEBUG=False IN_DOCKER=True IN_QEMU=False IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
# ArchiveBox v0.7.1+editable COMMIT_HASH=951bba5 BUILD_TIME=2023-12-17 16:46:05 1702860365
# IN_DOCKER=False IN_QEMU=False ARCH=arm64 OS=Darwin PLATFORM=macOS-14.2-arm64-arm-64bit PYTHON=Cpython
# FS_ATOMIC=True FS_REMOTE=False FS_USER=501:20 FS_PERMS=644
# DEBUG=False IS_TTY=True TZ=UTC SEARCH_BACKEND=ripgrep LDAP=False
p = platform.uname()
print(
'ArchiveBox v{}'.format(VERSION),
*((COMMIT_HASH[:7],) if COMMIT_HASH else ()),
sys.implementation.name.title(),
p.system,
platform.platform(),
p.machine,
'ArchiveBox v{}'.format(get_version(CONFIG)),
*((f'COMMIT_HASH={COMMIT_HASH[:7]}',) if COMMIT_HASH else ()),
f'BUILD_TIME={BUILD_TIME}',
)
print(
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'ARCH={p.machine}',
f'OS={p.system}',
f'PLATFORM={platform.platform()}',
f'PYTHON={sys.implementation.name.title()}',
)
OUTPUT_IS_REMOTE_FS = DATA_LOCATIONS['OUTPUT_DIR']['is_mount'] or DATA_LOCATIONS['ARCHIVE_DIR']['is_mount']
print(
f'DEBUG={DEBUG}',
f'IN_DOCKER={IN_DOCKER}',
f'IN_QEMU={IN_QEMU}',
f'IS_TTY={IS_TTY}',
f'TZ={TIMEZONE}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
f'FS_USER={PUID}:{PGID}',
f'FS_PERMS={OUTPUT_PERMISSIONS}',
)
print(
f'DEBUG={DEBUG}',
f'IS_TTY={IS_TTY}',
f'TZ={TIMEZONE}',
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
f'LDAP={LDAP}',
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
)
print()
@ -271,7 +282,7 @@ def version(quiet: bool=False,
print(printable_folder_status(name, path))
else:
print()
print('{white}[i] Data locations:{reset}'.format(**ANSI))
print('{white}[i] Data locations:{reset} (not in a data directory)'.format(**ANSI))
print()
check_dependencies()
@ -1005,9 +1016,9 @@ def setup(out_dir: Path=OUTPUT_DIR) -> None:
stderr('\n Installing SINGLEFILE_BINARY, READABILITY_BINARY, MERCURY_BINARY automatically using npm...')
if not NODE_VERSION:
stderr('[X] You must first install node using your system package manager', color='red')
stderr('[X] You must first install node & npm using your system package manager', color='red')
hint([
'curl -sL https://deb.nodesource.com/setup_15.x | sudo -E bash -',
'https://github.com/nodesource/distributions#table-of-contents',
'or to disable all node-based modules run: archivebox config --set USE_NODE=False',
])
raise SystemExit(1)

View file

@ -23,6 +23,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
echo "[+] Building Docker image: tag=$TAG_NAME version=$SHORT_VERSION arch=$SELECTED_PLATFORMS"
@ -85,12 +86,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --load . \
-t archivebox/archivebox:$TAG_NAME \
-t archivebox/archivebox:$VERSION \
-t archivebox/archivebox:$SHORT_VERSION \
-t archivebox/archivebox:$GIT_SHA \
-t archivebox/archivebox:latest \
-t nikisweeting/archivebox \
-t nikisweeting/archivebox:$TAG_NAME \
-t nikisweeting/archivebox:$VERSION \
-t nikisweeting/archivebox:$SHORT_VERSION \
-t nikisweeting/archivebox:$GIT_SHA \
-t nikisweeting/archivebox:latest \
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA \
-t ghcr.io/archivebox/archivebox/archivebox:latest

View file

@ -1,20 +1,55 @@
#!/bin/bash
# This Docker ENTRYPOINT script is called by `docker run archivebox ...` or `docker compose run archivebox ...`.
# It takes a CMD as $* shell arguments and runs it following these setup steps:
# - Set the archivebox user to use the correct PUID & PGID
# 1. highest precedence is for valid PUID and PGID env vars passsed in explicitly
# 2. fall back to DETECTED_PUID of files found within existing data dir
# 3. fall back to DEFAULT_PUID if no data dir or its owned by root
# - Create a new /data dir if necessary and set the correct ownership on it
# - Create a new /browsers dir if necessary and set the correct ownership on it
# - Check whether we're running inside QEMU emulation and show a warning if so.
# - Drop down to archivebox user permisisons and execute passed CMD command.
# Bash Environment Setup
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
# set -o xtrace
# set -o nounset
set -o errexit
set -o errtrace
set -o pipefail
# IFS=$'\n'
# Load global invariants (set by Dockerfile during image build time, not intended to be customized by users at runtime)
export DATA_DIR="${DATA_DIR:-/data}"
export ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
# default PUID and PGID if data dir is empty and no PUID+PGID is set
# Global default PUID and PGID if data dir is empty and no intended PUID+PGID is set manually by user
export DEFAULT_PUID=911
export DEFAULT_PGID=911
# if data directory already exists, autodetect detect owner by looking at files within
export DETECTED_UID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
export DETECTED_GID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
# If user tires to set PUID and PGID to root values manually, catch and reject because root is not allowed
if [[ "$PUID" == "0" ]] || [[ "$PGID" == "0" ]]; then
echo -e "\n[X] Error: Got PUID=$PUID and PGID=$PGID but ArchiveBox is not allowed to be run as root, please change or unset PUID & PGID and try again." > /dev/stderr
echo -e " Hint: some NFS/SMB/FUSE/etc. filesystems force-remap all permissions, leave PUID/PGID blank" > /dev/stderr
echo -e " or set PUID/PGID to the same value as the user/group they remap to (e.g. $DEFAULT_PUID:$DEFAULT_PGID)." > /dev/stderr
echo -e " https://linux.die.net/man/8/mount.cifs#:~:text=does%20not%20provide%20unix%20ownership" > /dev/stderr
exit 3
fi
# Set the archivebox user to use the configured UID & GID
# prefers PUID and PGID env vars passsed in explicitly, falls back to autodetected defaults
usermod -o -u "${PUID:-$DETECTED_UID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
groupmod -o -g "${PGID:-$DETECTED_GID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
# If data directory already exists, autodetect detect owner by looking at files within
export DETECTED_PUID="$(stat -c '%u' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PUID")"
export DETECTED_PGID="$(stat -c '%g' "$DATA_DIR/logs/errors.log" 2>/dev/null || echo "$DEFAULT_PGID")"
# If data directory exists but is owned by root, use defaults instead of root because root is not allowed
[[ "$DETECTED_PUID" == "0" ]] && export DETECTED_PUID="$DEFAULT_PUID"
[[ "$DETECTED_PGID" == "0" ]] && export DETECTED_PGID="$DEFAULT_PGID"
# Set archivebox user and group ids to desired PUID/PGID
usermod -o -u "${PUID:-$DETECTED_PUID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
groupmod -o -g "${PGID:-$DETECTED_PGID}" "$ARCHIVEBOX_USER" > /dev/null 2>&1
# re-set PUID and PGID to values reported by system instead of values we tried to set,
# in case wonky filesystems or Docker setups try to play UID/GID remapping tricks on us
@ -29,12 +64,12 @@ if [[ -d "$DATA_DIR/archive" ]]; then
# echo "[√] Permissions are correct"
else
# the only time this fails is if the host filesystem doesn't allow us to write as root (e.g. some NFS mapall/maproot problems, connection issues, drive dissapeared, etc.)
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
exit 1
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir (currently owned by $(stat -c '%u' "$DATA_DIR"):$(stat -c '%g' "$DATA_DIR")." >&2
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:" > /dev/stderr
echo -e " \$ chown -R $PUID:$PGID ./data\n" > /dev/stderr
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" > /dev/stderr
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" > /dev/stderr
exit 3
fi
else
# create data directory
@ -46,29 +81,35 @@ fi
chown $PUID:$PGID "$DATA_DIR"
chown $PUID:$PGID "$DATA_DIR"/*
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome
# also chown BROWSERS_DIR because otherwise 'archivebox setup' wont be able to install chrome at runtime
PLAYWRIGHT_BROWSERS_PATH="${PLAYWRIGHT_BROWSERS_PATH:-/browsers}"
mkdir -p "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"
chown $PUID:$PGID "${PLAYWRIGHT_BROWSERS_PATH}/*"
chown $PUID:$PGID "$PLAYWRIGHT_BROWSERS_PATH"/*
rm -Rf "$PLAYWRIGHT_BROWSERS_PATH/permissions_test_safe_to_delete"
# (this check is written in blood, QEMU silently breaks things in ways that are not obvious)
export IN_QEMU="$(pmap 1 | grep qemu | wc -l | grep -E '^0$' >/dev/null && echo 'False' || echo 'True')"
if [[ "$IN_QEMU" == 'True' ]]; then
echo -e "\n[!] Warning: Running $(uname -m) emulated container in QEMU, some things will break!" >&2
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." >&2
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" >&2
export IN_QEMU="$(pmap 1 | grep qemu >/dev/null && echo 'True' || echo 'False')"
if [[ "$IN_QEMU" == "True" ]]; then
echo -e "\n[!] Warning: Running $(uname -m) docker image using QEMU emulation, some things will break!" > /dev/stderr
echo -e " chromium (screenshot, pdf, dom), singlefile, and any dependencies that rely on inotify will not run in QEMU." > /dev/stderr
echo -e " See here for more info: https://github.com/microsoft/playwright/issues/17395#issuecomment-1250830493\n" > /dev/stderr
fi
# Drop permissions to run commands as the archivebox user
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
# handle "docker run archivebox /some/non-archivebox/command --with=some args" by passing args directly to bash -c
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
# e.g. "docker run archivebox archivebox init:
# "docker run archivebox /venv/bin/archivebox-alt init"
# "docker run archivebox /bin/bash -c '...'"
# "docker run archivebox echo test"
# "docker run archivebox cat /VERSION.txt"
exec gosu "$PUID" bash -c "$*"
else
# handle "docker run archivebox add some subcommand --with=args abc" by calling archivebox to run as args as CLI subcommand
# e.g. "docker run archivebox add --depth=1 https://example.com"
# e.g. "docker run archivebox help"
# "docker run archivebox add --depth=1 https://example.com"
# "docker run archivebox manage createsupseruser"
# "docker run archivebox server 0.0.0.0:8000"
exec gosu "$PUID" bash -c "archivebox $*"

View file

@ -18,6 +18,7 @@ SUPPORTED_PLATFORMS="linux/amd64,linux/arm64,linux/arm/v7"
TAG_NAME="${1:-$(git rev-parse --abbrev-ref HEAD)}"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
GIT_SHA=sha-"$(git rev-parse --short HEAD)"
SELECTED_PLATFORMS="${2:-$SUPPORTED_PLATFORMS}"
@ -34,12 +35,16 @@ docker buildx build --platform "$SELECTED_PLATFORMS" --push . \
-t archivebox/archivebox:$TAG_NAME \
-t archivebox/archivebox:$VERSION \
-t archivebox/archivebox:$SHORT_VERSION \
-t archivebox/archivebox:$GIT_SHA \
-t archivebox/archivebox:latest \
-t nikisweeting/archivebox \
-t nikisweeting/archivebox:$TAG_NAME \
-t nikisweeting/archivebox:$VERSION \
-t nikisweeting/archivebox:$SHORT_VERSION \
-t nikisweeting/archivebox:$GIT_SHA \
-t nikisweeting/archivebox:latest \
-t ghcr.io/archivebox/archivebox/archivebox:$TAG_NAME \
-t ghcr.io/archivebox/archivebox/archivebox:$VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION
-t ghcr.io/archivebox/archivebox/archivebox:$SHORT_VERSION \
-t ghcr.io/archivebox/archivebox/archivebox:$GIT_SHA

975
pdm.lock

File diff suppressed because it is too large Load diff

View file

@ -3,10 +3,9 @@ name = "archivebox"
version = "0.7.1"
description = "Self-hosted internet archiving solution."
authors = [
{name = "Nick Sweeting", email = "setup.py@archivebox.io"},
{name = "Nick Sweeting", email = "pyproject.toml@archivebox.io"},
]
dependencies = [
# "setuptools>=68.2.2",
"croniter>=0.3.34",
"dateparser>=1.0.0",
"django-extensions>=3.0.3",
@ -16,8 +15,7 @@ dependencies = [
"python-crontab>=2.5.1",
"requests>=2.24.0",
"w3lib>=1.22.0",
# "youtube-dl>=2021.04.17",
"yt-dlp>=2021.4.11",
"yt-dlp>=2023.10.13",
# "playwright>=1.39.0; platform_machine != 'armv7l'",
]
requires-python = ">=3.9"
@ -41,6 +39,9 @@ classifiers = [
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
@ -56,34 +57,26 @@ classifiers = [
# pdm lock -G:all
# pdm install -G:all
[tool.pdm.dev-dependencies]
build = [
"setuptools",
dev = [
# build
"setuptools>=68.2.2",
"wheel",
"pdm",
# "bottle",
# "stdeb",
# "twine",
]
lint = [
"flake8",
"mypy",
"django-stubs",
]
test = [
"pytest",
]
debug = [
"django-debug-toolbar",
"djdt_flamegraph",
"ipdb",
]
doc = [
"homebrew-pypi-poet>=0.10.0",
# docs
"recommonmark",
"sphinx",
"sphinx-rtd-theme",
]
dev = [
"homebrew-pypi-poet>=0.10.0",
# debug
"django-debug-toolbar",
"djdt_flamegraph",
"ipdb",
# test
"pytest",
# lint
"flake8",
"mypy",
"django-stubs",
]
[tool.pdm.scripts]

View file

@ -1,35 +1,41 @@
# This file is @generated by PDM.
# Please do not edit it manually.
appnope==0.1.3
asgiref==3.7.2
asttokens==2.4.1
brotli==1.1.0
certifi==2023.7.22
brotli==1.1.0; implementation_name == "cpython"
brotlicffi==1.1.0.0; implementation_name != "cpython"
certifi==2023.11.17
cffi==1.16.0; implementation_name != "cpython"
charset-normalizer==3.3.2
colorama==0.4.6; sys_platform == "win32"
croniter==2.0.1
dateparser==1.1.8
dateparser==1.2.0
decorator==5.1.1
django==3.1.14
django-auth-ldap==4.1.0
django-extensions==3.1.5
exceptiongroup==1.2.0; python_version < "3.11"
executing==2.0.1
idna==3.4
ipython==8.17.2
idna==3.6
ipython==8.18.1
jedi==0.19.1
matplotlib-inline==0.1.6
mutagen==1.47.0
mypy-extensions==1.0.0
parso==0.8.3
pexpect==4.8.0
prompt-toolkit==3.0.40
ptyprocess==0.7.0
pexpect==4.9.0; sys_platform != "win32"
prompt-toolkit==3.0.43
ptyprocess==0.7.0; sys_platform != "win32"
pure-eval==0.2.2
pyasn1==0.5.0
pyasn1==0.5.1
pyasn1-modules==0.3.0
pycparser==2.21; implementation_name != "cpython"
pycryptodomex==3.19.0
pygments==2.16.1
pygments==2.17.2
python-crontab==3.0.0
python-dateutil==2.8.2
python-ldap==3.4.4
pytz==2023.3.post1
regex==2023.10.3
requests==2.31.0
@ -37,10 +43,12 @@ six==1.16.0
sonic-client==1.0.0
sqlparse==0.4.4
stack-data==0.6.3
traitlets==5.13.0
traitlets==5.14.0
typing-extensions==4.9.0; python_version < "3.11"
tzdata==2023.3; platform_system == "Windows"
tzlocal==5.2
urllib3==2.1.0
w3lib==2.1.2
wcwidth==0.2.10
wcwidth==0.2.12
websockets==12.0
yt-dlp==2023.10.13
yt-dlp==2023.11.16