mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
08cffb8742
Lack of `fi` resulted in a syntax error. Also, change `;` to `||` to ensure it builds successfully on architectures other than `linux/arm/v7`. ``` [2023-10-24T10:46:49.941Z] ------ > [dev_container_auto_added_stage_label 11/18] RUN mkdir -p "/home/archivebox/.config/chromium/Crash Reports/pending/" && chown -R archivebox "/home/archivebox/.config" ; if [[ "linux/amd64" == "linux/arm/v7" ]]; then $exit 0; else exit 1: 0.401 /bin/bash: -c: line 2: syntax error: unexpected end of file ------ WARNING: buildx: git was not found in the system. Current commit information was not captured by the build [2023-10-24T10:46:49.942Z] Dockerfile-with-features:135 -------------------- 134 | && ln -s "$CHROME_BINARY" /usr/bin/chromium-browser 135 | >>> RUN mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \ 136 | >>> && chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \ 137 | >>> ; if [[ "$TARGETPLATFORM" == "linux/arm/v7" ]]; then $exit 0; else exit 1 138 | # ignore failure for architectures where no playwright release is available yet -------------------- ERROR: failed to solve: process "/bin/bash -c mkdir -p \"/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/\" && chown -R $ARCHIVEBOX_USER \"/home/${ARCHIVEBOX_USER}/.config\" ; if [[ \"$TARGETPLATFORM\" == \"linux/arm/v7\" ]]; then $exit 0; else exit 1" did not complete successfully: exit code: 2 ```
194 lines
8.4 KiB
Docker
194 lines
8.4 KiB
Docker
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
|
|
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, yt-dlp, single-file
|
|
# Usage:
|
|
# git submodule update --init --recursive
|
|
# git pull --recurse-submodules
|
|
# docker build . -t archivebox --no-cache
|
|
# docker run -v "$PWD/data":/data archivebox init
|
|
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
|
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
|
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
|
# Multi-arch build:
|
|
# docker buildx create --use
|
|
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
|
|
#
|
|
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
|
|
|
|
|
FROM debian:bookworm-backports
|
|
# Debian 12 w/ faster package updates: https://packages.debian.org/bookworm-backports/
|
|
|
|
LABEL name="archivebox" \
|
|
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
|
|
description="All-in-one personal internet archiving container" \
|
|
homepage="https://github.com/ArchiveBox/ArchiveBox" \
|
|
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
|
|
|
|
######### Base System Setup ####################################
|
|
|
|
# Global system-level config
|
|
ENV TZ=UTC \
|
|
LANGUAGE=en_US:en \
|
|
LC_ALL=C.UTF-8 \
|
|
LANG=C.UTF-8 \
|
|
PYTHONIOENCODING=UTF-8 \
|
|
PYTHONUNBUFFERED=1 \
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
|
npm_config_loglevel=error
|
|
|
|
# Application-level config
|
|
ENV CODE_DIR=/app \
|
|
DATA_DIR=/data \
|
|
GLOBAL_VENV=/venv \
|
|
APP_VENV=/app/.venv \
|
|
NODE_MODULES=/app/node_modules \
|
|
ARCHIVEBOX_USER="archivebox"
|
|
|
|
ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
|
|
SHELL ["/bin/bash", "-c"]
|
|
ARG TARGETPLATFORM
|
|
ARG TARGETARCH
|
|
ARG TARGETVARIANT
|
|
RUN printf "[i] Building for TARGETPLATFORM=${TARGETPLATFORM}" \
|
|
&& printf ", TARGETARCH=${TARGETARCH}" \
|
|
&& printf ", TARGETVARIANT=${TARGETVARIANT} \n" \
|
|
&& printf "uname -a : " && uname -a
|
|
|
|
|
|
# Create non-privileged user for archivebox and chrome
|
|
RUN echo "[*] Setting up system environment..." \
|
|
&& groupadd --system $ARCHIVEBOX_USER \
|
|
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \
|
|
&& mkdir -p /etc/apt/keyrings
|
|
|
|
# Install system apt dependencies (adding backports to access more recent apt updates)
|
|
RUN echo "[+] Installing system dependencies..." \
|
|
&& echo 'deb https://deb.debian.org/debian bookworm-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
|
|
&& apt-get update -qq \
|
|
&& apt-get install -qq -y \
|
|
apt-transport-https ca-certificates gnupg2 curl wget \
|
|
zlib1g-dev dumb-init gosu cron unzip \
|
|
# nano iputils-ping dnsutils htop procps \
|
|
# 1. packaging dependencies
|
|
# 2. docker and init system dependencies
|
|
# 3. frivolous CLI helpers to make debugging failed archiving easier
|
|
&& mkdir -p /etc/apt/keyrings \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
######### Language Environments ####################################
|
|
|
|
# Install Node environment
|
|
RUN echo "[+] Installing Node environment..." \
|
|
&& echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_21.x nodistro main' >> /etc/apt/sources.list.d/nodejs.list \
|
|
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
|
|
&& apt-get update -qq \
|
|
&& apt-get install -qq -y nodejs libatomic1 \
|
|
&& npm i -g npm \
|
|
&& node --version \
|
|
&& npm --version
|
|
|
|
# Install Python environment
|
|
RUN echo "[+] Installing Python environment..." \
|
|
&& apt-get update -qq \
|
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
|
python3 python3-pip python3-venv python3-setuptools python3-wheel python-dev-is-python3 \
|
|
python3-ldap libldap2-dev libsasl2-dev libssl-dev python3-msgpack \
|
|
&& rm /usr/lib/python3*/EXTERNALLY-MANAGED \
|
|
&& python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
|
|
&& $GLOBAL_VENV/bin/pip install --upgrade pip pdm setuptools wheel python-ldap \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
######### Extractor Dependencies ##################################
|
|
|
|
# Install apt dependencies
|
|
RUN echo "[+] Installing extractor APT dependencies..." \
|
|
&& apt-get update -qq \
|
|
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
|
curl wget git yt-dlp ffmpeg ripgrep \
|
|
# Packages we have also needed in the past:
|
|
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
|
|
# fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install chromium browser using playwright
|
|
ENV PLAYWRIGHT_BROWSERS_PATH="/browsers"
|
|
RUN echo "[+] Installing extractor Chromium dependency..." \
|
|
&& apt-get update -qq \
|
|
&& $GLOBAL_VENV/bin/pip install playwright \
|
|
&& $GLOBAL_VENV/bin/playwright install --with-deps chromium \
|
|
&& CHROME_BINARY="$($GLOBAL_VENV/bin/python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
|
|
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
|
|
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
|
|
&& chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config" \
|
|
|| if [[ "$TARGETPLATFORM" == "linux/arm/v7" ]]; then exit 0; else exit 1; fi
|
|
# ignore failure for architectures where no playwright release is available yet
|
|
|
|
# Install Node dependencies
|
|
WORKDIR "$CODE_DIR"
|
|
COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/"
|
|
RUN echo "[+] Installing extractor Node dependencies..." \
|
|
&& npm ci --prefer-offline --no-audit \
|
|
&& npm version
|
|
|
|
######### Build Dependencies ####################################
|
|
|
|
# # Building ArchiveBox from source with all pdm dev dependencies
|
|
# WORKDIR "$CODE_DIR"
|
|
# COPY --chown=root:root --chmod=755 "./pyproject.toml" "./pdm.lock" "$CODE_DIR/"
|
|
# RUN echo "[+] Installing project Python dependencies..." \
|
|
# && apt-get update -qq \
|
|
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
|
# build-essential libssl-dev libldap2-dev libsasl2-dev \
|
|
# && pdm use -f $GLOBAL_VENV \
|
|
# && pdm install --fail-fast --no-lock --group :all --no-self \
|
|
# && pdm build \
|
|
# && apt-get purge -y \
|
|
# build-essential libssl-dev libldap2-dev libsasl2-dev \
|
|
# # these are only needed to build CPython libs, we discard after build phase to shrink layer size
|
|
# && apt-get autoremove -y \
|
|
# && rm -rf /var/lib/apt/lists/*
|
|
|
|
# Install ArchiveBox Python package from source
|
|
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
|
|
RUN echo "[*] Installing ArchiveBox package from /app..." \
|
|
&& apt-get update -qq \
|
|
&& $GLOBAL_VENV/bin/pip install -e "$CODE_DIR"[sonic,ldap]
|
|
|
|
####################################################
|
|
|
|
# Setup ArchiveBox runtime config
|
|
WORKDIR "$DATA_DIR"
|
|
ENV IN_DOCKER=True \
|
|
WGET_BINARY="wget" \
|
|
YOUTUBEDL_BINARY="yt-dlp" \
|
|
CHROME_SANDBOX=False \
|
|
CHROME_BINARY="/usr/bin/chromium-browser" \
|
|
USE_SINGLEFILE=True \
|
|
SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
|
|
USE_READABILITY=True \
|
|
READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
|
|
USE_MERCURY=True \
|
|
MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
|
|
|
|
# Print version for nice docker finish summary
|
|
# RUN archivebox version
|
|
RUN echo "[√] Finished Docker build succesfully. Saving build summary in: /version_info.txt" \
|
|
&& uname -a | tee -a /version_info.txt \
|
|
&& env --chdir="$NODE_DIR" npm version | tee -a /version_info.txt \
|
|
&& env --chdir="$CODE_DIR" pdm info | tee -a /version_info.txt \
|
|
&& "$CODE_DIR/bin/docker_entrypoint.sh" archivebox version 2>&1 | tee -a /version_info.txt
|
|
|
|
####################################################
|
|
|
|
# Open up the interfaces to the outside world
|
|
VOLUME "/data"
|
|
EXPOSE 8000
|
|
|
|
# Optional:
|
|
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
|
|
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
|
|
|
|
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
|
|
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
|