fix armv7 build for Docker and bare metal

This commit is contained in:
Nick Sweeting 2023-10-30 23:25:51 -07:00
parent 62e077a5bc
commit 6b35c30e17
3 changed files with 174 additions and 86 deletions

View file

@ -24,117 +24,192 @@ LABEL name="archivebox" \
homepage="https://github.com/ArchiveBox/ArchiveBox" \
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
######### Base System Setup ####################################
ARG TARGETPLATFORM
ARG TARGETARCH
ARG TARGETVARIANT
######### Environment Variables #################################
# Global system-level config
ENV TZ=UTC \
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
LANG=C.UTF-8 \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
npm_config_loglevel=error
# Application-level config
# Version config
ENV PYTHON_VERSION=3.11 \
NODE_VERSION=21
# User config
ENV ARCHIVEBOX_USER="archivebox" \
DEFAULT_PUID=911 \
DEFAULT_PGID=911
# Global paths
ENV CODE_DIR=/app \
DATA_DIR=/data \
GLOBAL_VENV=/venv \
APP_VENV=/app/.venv \
NODE_MODULES=/app/node_modules \
ARCHIVEBOX_USER="archivebox"
PLAYWRIGHT_BROWSERS_PATH=/browsers
# Application-level paths
ENV APP_VENV=/app/.venv \
NODE_MODULES=/app/node_modules
# Build shell config
ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
SHELL ["/bin/bash", "-c"]
ARG TARGETPLATFORM
ARG TARGETARCH
ARG TARGETVARIANT
RUN printf "[i] Building for TARGETPLATFORM=${TARGETPLATFORM}" \
&& printf ", TARGETARCH=${TARGETARCH}" \
&& printf ", TARGETVARIANT=${TARGETVARIANT} \n" \
&& printf "uname -a : " && uname -a
SHELL ["/bin/bash", "-o", "pipefail", "-c"]
######### System Environment ####################################
# Detect ArchiveBox version number by reading package.json
COPY --chown=root:root --chmod=755 package.json "$CODE_DIR/"
RUN grep '"version": ' "${CODE_DIR}/package.json" | awk -F'"' '{print $4}' > /VERSION.txt
# Print debug info about build and save it to disk
RUN (echo "[i] Docker build for ArchiveBox $(cat /VERSION.txt) starting..." \
&& echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
&& echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \
&& echo \
&& echo "GLOBAL_VENV=${GLOBAL_VENV} APP_VENV=${APP_VENV} NODE_MODULES=${NODE_MODULES}" \
&& echo "PYTHON=${PYTHON_VERSION} NODE=${NODE_VERSION} PATH=${PATH}" \
&& echo "CODE_DIR=${CODE_DIR} DATA_DIR=${DATA_DIR}" \
&& echo \
&& uname -a \
&& cat /etc/os-release | head -n7 \
&& which bash && bash --version | head -n1 \
&& which dpkg && dpkg --version | head -n1 \
&& echo -e '\n\n' && env && echo -e '\n\n' \
) | tee -a /VERSION.txt
# Create non-privileged user for archivebox and chrome
RUN echo "[*] Setting up system environment..." \
RUN echo "[*] Setting up $ARCHIVEBOX_USER user ${DEFAULT_PUID}..." \
&& groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \
&& mkdir -p /etc/apt/keyrings
&& usermod -u "$DEFAULT_PUID" "$ARCHIVEBOX_USER" \
&& groupmod -g "$DEFAULT_PGID" "$ARCHIVEBOX_USER" \
&& echo -e "\nARCHIVEBOX_USER=$ARCHIVEBOX_USER PUID=$(id -u $ARCHIVEBOX_USER) PGID=$(id -g $ARCHIVEBOX_USER)\n\n" \
| tee -a /VERSION.txt
# DEFAULT_PUID and DEFAULT_PID are overriden by PUID and PGID in /bin/docker_entrypoint.sh at runtime
# https://docs.linuxserver.io/general/understanding-puid-and-pgid
# Install system apt dependencies (adding backports to access more recent apt updates)
RUN echo "[+] Installing system dependencies..." \
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
&& apt-get update -qq \
&& apt-get install -qq -y \
apt-transport-https ca-certificates gnupg2 curl wget \
zlib1g-dev dumb-init gosu cron unzip \
# nano iputils-ping dnsutils htop procps \
# 1. packaging dependencies
# 2. docker and init system dependencies
# 3. frivolous CLI helpers to make debugging failed archiving easier
RUN echo "[+] Installing system dependencies for $TARGETPLATFORM..." \
# && echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
&& mkdir -p /etc/apt/keyrings \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
# 1. packaging dependencies
apt-transport-https ca-certificates gnupg2 curl wget \
# 2. docker and init system dependencies
zlib1g-dev dumb-init gosu cron unzip grep \
# 3. frivolous CLI helpers to make debugging failed archiving easier
# nano iputils-ping dnsutils htop procps jq yq
&& rm -rf /var/lib/apt/lists/*
######### Language Environments ####################################
# Install Node environment
RUN echo "[+] Installing Node environment..." \
&& echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_21.x nodistro main' >> /etc/apt/sources.list.d/nodejs.list \
RUN echo "[+] Installing Node $NODE_VERSION environment..." \
&& echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_${NODE_VERSION}.x nodistro main" >> /etc/apt/sources.list.d/nodejs.list \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& apt-get update -qq \
&& apt-get install -qq -y nodejs libatomic1 \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
nodejs libatomic1 \
&& rm -rf /var/lib/apt/lists/* \
# Update NPM to latest version
&& npm i -g npm \
&& node --version \
&& npm --version
# Save version info
&& ( \
which node && node --version \
&& which npm && npm --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install Python environment
RUN echo "[+] Installing Python environment..." \
RUN echo "[+] Installing Python $PYTHON_VERSION environment..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
python3 python3-pip python3-venv python3-setuptools python3-wheel python-dev-is-python3 \
python3-ldap libldap2-dev libsasl2-dev libssl-dev python3-msgpack \
&& rm -rf /var/lib/apt/lists/* \
# tell PDM to allow using global system python site packages
&& rm /usr/lib/python3*/EXTERNALLY-MANAGED \
# create global virtual environment GLOBAL_VENV to use (better than using pip install --global)
&& python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
# install global dependencies / python build dependencies in GLOBAL_VENV
&& $GLOBAL_VENV/bin/pip install --upgrade pip pdm setuptools wheel python-ldap \
&& rm -rf /var/lib/apt/lists/*
# Save version info
&& ( \
which python3 && python3 --version | grep " $PYTHON_VERSION" \
&& which pip3 && pip3 --version \
&& which pdm && pdm --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
######### Extractor Dependencies ##################################
# Install apt dependencies
RUN echo "[+] Installing extractor APT dependencies..." \
RUN echo "[+] Installing APT extractor dependencies..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
curl wget git yt-dlp ffmpeg ripgrep \
# Packages we have also needed in the past:
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
# fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/* \
# Save version info
&& ( \
which curl && curl --version | head -n1 \
&& which wget && wget --version | head -n1 \
&& which yt-dlp && yt-dlp --version | head -n1 \
&& which git && git --version | head -n1 \
&& which rg && rg --version | head -n1 \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install chromium browser using playwright
ENV PLAYWRIGHT_BROWSERS_PATH="/browsers"
RUN echo "[+] Installing extractor Chromium dependency..." \
RUN echo "[+] Installing Browser binary dependencies for $TARGETPLATFORM..." \
&& apt-get update -qq \
&& $GLOBAL_VENV/bin/pip install playwright \
&& if [[ "$TARGETPLATFORM" == "linux/amd64" || "$TARGETPLATFORM" == "linux/arm64" ]]; then \
# install Chromium using playwright
$GLOBAL_VENV/bin/pip install playwright \
&& $GLOBAL_VENV/bin/playwright install --with-deps chromium \
&& CHROME_BINARY="$($GLOBAL_VENV/bin/python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
&& export CHROME_BINARY="$($GLOBAL_VENV/bin/python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
else \
# install Chromium on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
apt-get install -qq -y -t bookworm-backports --no-install-recommends \
chromium fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& export CHROME_BINARY="$(which chromium)"; \
fi \
&& rm -rf /var/lib/apt/lists/* \
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
&& chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \
|| if [[ "$TARGETPLATFORM" == "linux/arm/v7" ]]; then exit 0; else exit 1; fi
# ignore failure for architectures where no playwright release is available yet
# Save version info
&& ( \
which chromium-browser && /usr/bin/chromium-browser --version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
# Install Node dependencies
WORKDIR "$CODE_DIR"
COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/"
RUN echo "[+] Installing extractor Node dependencies..." \
RUN echo "[+] Installing NPM extractor dependencies..." \
&& npm ci --prefer-offline --no-audit \
&& npm version
&& ( \
which node && node --version \
&& which npm && npm version \
&& echo -e '\n\n' \
) | tee -a /VERSION.txt
######### Build Dependencies ####################################
# # Building ArchiveBox from source with all pdm dev dependencies
# # Install ArchiveBox development dependencies
# WORKDIR "$CODE_DIR"
# COPY --chown=root:root --chmod=755 "./pyproject.toml" "./pdm.lock" "$CODE_DIR/"
# RUN echo "[+] Installing project Python dependencies..." \
@ -152,38 +227,48 @@ RUN echo "[+] Installing extractor Node dependencies..." \
# Install ArchiveBox Python package from source
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
RUN echo "[*] Installing ArchiveBox package from /app..." \
RUN echo "[*] Installing PIP ArchiveBox package from $CODE_DIR..." \
&& apt-get update -qq \
&& $GLOBAL_VENV/bin/pip install -e "$CODE_DIR"[sonic,ldap]
# install C compiler to build deps on platforms that dont have 32-bit wheels available on pypi
&& [[ "$TARGETPLATFORM" == "linux/arm/v7" ]] \
&& apt-get install -qq -y --no-install-recommends build-essential python3-regex \
# INSTALL ARCHIVEBOX python package globally from CODE_DIR, with all optional dependencies
&& $GLOBAL_VENV/bin/pip3 install -e "$CODE_DIR"[sonic,ldap] \
# save docker image size and always remove compilers / build tools after building is complete
&& apt-get purge -y build-essential \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
####################################################
# Setup ArchiveBox runtime config
WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \
WGET_BINARY="wget" \
YOUTUBEDL_BINARY="yt-dlp" \
CHROME_SANDBOX=False \
CHROME_BINARY="/usr/bin/chromium-browser" \
USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
USE_READABILITY=True \
READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
USE_MERCURY=True \
MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
ENV IN_DOCKER=True
## No need to set explicitly, these values will be autodetected by archivebox in docker:
# CHROME_SANDBOX=False \
# WGET_BINARY="wget" \
# YOUTUBEDL_BINARY="yt-dlp" \
# CHROME_BINARY="/usr/bin/chromium-browser" \
# USE_SINGLEFILE=True \
# SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
# USE_READABILITY=True \
# READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
# USE_MERCURY=True \
# MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
# Print version for nice docker finish summary
# RUN archivebox version
RUN echo "[√] Finished Docker build succesfully. Saving build summary in: /version_info.txt" \
&& uname -a | tee -a /version_info.txt \
&& env --chdir="$NODE_DIR" npm version | tee -a /version_info.txt \
&& env --chdir="$CODE_DIR" pdm info | tee -a /version_info.txt \
&& "$CODE_DIR/bin/docker_entrypoint.sh" archivebox version 2>&1 | tee -a /version_info.txt
RUN (echo -e "\n\n[√] Finished Docker build succesfully. Saving build summary in: /VERSION.txt" \
&& echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) ($(uname -s) ${TARGETARCH} ${TARGETVARIANT})" \
&& echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ}\n\n" \
&& "$CODE_DIR/bin/docker_entrypoint.sh" \
archivebox version 2>&1 \
) | tee -a /VERSION.txt
####################################################
# Open up the interfaces to the outside world
VOLUME "/data"
WORKDIR "$DATA_DIR"
VOLUME "$DATA_DIR"
EXPOSE 8000
# Optional:

View file

@ -3,18 +3,18 @@
DATA_DIR="${DATA_DIR:-/data}"
ARCHIVEBOX_USER="${ARCHIVEBOX_USER:-archivebox}"
export PUID=${PUID:-911}
export PGID=${PGID:-911}
# Set the archivebox user UID & GID
if [[ -n "$PUID" && "$PUID" != 0 ]]; then
usermod -u "$PUID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
fi
if [[ -n "$PGID" && "$PGID" != 0 ]]; then
groupmod -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
fi
usermod -o -u "$PUID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
groupmod -o -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
export PUID="$(id -u archivebox)"
export PGID="$(id -g archivebox)"
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
# Check the permissions of the data dir (or create if it doesn't exist)
if [[ -d "$DATA_DIR/archive" ]]; then
if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete" 2>/dev/null; then
@ -22,9 +22,11 @@ if [[ -d "$DATA_DIR/archive" ]]; then
rm "$DATA_DIR/archive/.permissions_test_safe_to_delete"
# echo "[√] Permissions are correct"
else
echo "[X] Error: ArchiveBox (uid=$PUID) is not able to write to your ./data dir. Fix the permissions and retry:" >&2
echo " \$ chown -R $PUID:$PGID data" >&2
echo " You may need to pass PUID & PGID to the Docker container: https://docs.linuxserver.io/general/understanding-puid-and-pgid" >&2
echo -e "\n[X] Error: archivebox user (PUID=$PUID) is not able to write to your ./data dir." >&2
echo -e " Change ./data to be owned by PUID=$PUID PGID=$PGID on the host and retry:"
echo -e " \$ chown -R $PUID:$PGID ./data\n" >&2
echo -e " Configure the PUID & PGID environment variables to change the desired owner:" >&2
echo -e " https://docs.linuxserver.io/general/understanding-puid-and-pgid\n" >&2
exit 1
fi
else
@ -34,19 +36,19 @@ fi
# force set the ownership of the data dir contents to the archivebox user and group
# this is needed because Docker Desktop often does not map user permissions from the host properly
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR" "$DATA_DIR"/*
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"/*
# Drop permissions to run commands as the archivebox user
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "archivebox" ]]; then
# arg 1 is a binary, execute it verbatim
# e.g. "archivebox init"
# "/bin/bash"
# "echo"
if [[ "$1" == /* || "$1" == "bash" || "$1" == "sh" || "$1" == "echo" || "$1" == "cat" || "$1" == "archivebox" ]]; then
# handle "docker run archivebox /some/non-archivebox/command" by executing args as direct bash command
# e.g. "docker run archivebox /venv/bin/archivebox-alt init"
# "docker run archivebox /bin/bash -c '...'"
# "docker run archivebox echo test"
exec gosu "$ARCHIVEBOX_USER" bash -c "$*"
else
# no command given, assume args were meant to be passed to archivebox cmd
# e.g. "add https://example.com"
# "manage createsupseruser"
# "server 0.0.0.0:8000"
# handle "docker run archivebox add ..." by running args as archivebox $subcommand
# e.g. "docker run archivebox add https://example.com"
# "docker run archivebox manage createsupseruser"
# "docker run archivebox server 0.0.0.0:8000"
exec gosu "$ARCHIVEBOX_USER" bash -c "archivebox $*"
fi

View file

@ -63,6 +63,7 @@ build = [
"stdeb",
"twine",
"wheel",
"regex=2021.9.30; platform_machine == 'armv7l'",
]
lint = [
"flake8",