ArchiveBox/Dockerfile

135 lines
5.3 KiB
Docker
Raw Normal View History

# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
2019-02-28 19:04:37 +00:00
# Usage:
# docker build . -t archivebox --no-cache
2020-07-22 05:30:58 +00:00
# docker run -v "$PWD/data":/data archivebox init
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
# Multi-arch build:
# docker buildx create --use
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
2019-02-28 19:04:37 +00:00
FROM python:3.10-slim-bullseye
2019-07-09 17:05:51 +00:00
2020-06-25 21:46:11 +00:00
LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
description="All-in-one personal internet archiving container" \
2020-11-23 07:04:39 +00:00
homepage="https://github.com/ArchiveBox/ArchiveBox" \
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
2018-10-14 02:47:30 +00:00
# System-level base config
2020-06-26 01:30:29 +00:00
ENV TZ=UTC \
2020-06-25 21:46:11 +00:00
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
2020-06-26 01:30:29 +00:00
LANG=C.UTF-8 \
2020-06-25 21:46:11 +00:00
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
2018-10-14 02:47:30 +00:00
# Application-level base config
ENV CODE_DIR=/app \
2020-06-25 21:46:11 +00:00
VENV_PATH=/venv \
DATA_DIR=/data \
NODE_DIR=/node \
ARCHIVEBOX_USER="archivebox"
# Create non-privileged user for archivebox and chrome
RUN groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
# Install system dependencies
RUN apt-get update -qq \
2020-06-25 21:46:11 +00:00
&& apt-get install -qq -y --no-install-recommends \
2020-08-14 04:14:18 +00:00
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
2020-08-18 05:59:04 +00:00
dumb-init gosu cron unzip curl \
&& rm -rf /var/lib/apt/lists/*
2020-08-14 04:14:18 +00:00
# Install apt dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
wget curl chromium git ffmpeg youtube-dl ripgrep \
2020-08-14 04:14:18 +00:00
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& ln -s /usr/bin/chromium /usr/bin/chromium-browser \
2020-08-14 04:14:18 +00:00
&& rm -rf /var/lib/apt/lists/*
# Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_17.x buster main' >> /etc/apt/sources.list \
2020-06-26 01:30:29 +00:00
&& apt-get update -qq \
2020-06-25 21:46:11 +00:00
&& apt-get install -qq -y --no-install-recommends \
2021-02-01 10:45:21 +00:00
nodejs \
# && npm install -g npm \
&& rm -rf /var/lib/apt/lists/*
2020-04-23 01:13:49 +00:00
2020-09-08 22:12:55 +00:00
# Install Node dependencies
WORKDIR "$NODE_DIR"
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
npm_config_loglevel=error
ADD ./package.json ./package.json
ADD ./package-lock.json ./package-lock.json
RUN npm ci
# Install Python dependencies
WORKDIR "$CODE_DIR"
2020-07-28 04:53:50 +00:00
ENV PATH="${PATH}:$VENV_PATH/bin"
2020-06-26 01:30:29 +00:00
RUN python -m venv --clear --symlinks "$VENV_PATH" \
&& pip install --upgrade --quiet pip setuptools \
&& mkdir -p "$CODE_DIR/archivebox"
ADD "./setup.py" "$CODE_DIR/"
ADD "./package.json" "$CODE_DIR/archivebox/"
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
2020-08-14 03:55:02 +00:00
build-essential python-dev python3-dev \
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
&& pip install -r /tmp/requirements.txt \
&& pip install --upgrade youtube-dl yt-dlp \
2020-08-14 03:55:02 +00:00
&& apt-get purge -y build-essential python-dev python3-dev \
2020-08-14 03:43:02 +00:00
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
2018-10-14 02:47:30 +00:00
# Install apt development dependencies
# RUN apt-get install -qq \
# && apt-get install -qq -y --no-install-recommends \
# python3 python3-dev python3-pip python3-venv python3-all \
# dh-python debhelper devscripts dput software-properties-common \
# python3-distutils python3-setuptools python3-wheel python3-stdeb
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
# && pip install --quiet -r /tmp/dev_requirements.txt
2020-12-20 01:11:19 +00:00
# Install ArchiveBox Python package and its dependencies
WORKDIR "$CODE_DIR"
ADD . "$CODE_DIR"
RUN pip install -e .
# Setup ArchiveBox runtime config
WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \
CHROME_SANDBOX=False \
CHROME_BINARY="/usr/bin/chromium-browser" \
USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
USE_READABILITY=True \
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
USE_MERCURY=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser" \
YOUTUBEDL_BINARY="yt-dlp"
2018-10-14 02:47:30 +00:00
# Print version for nice docker finish summary
# RUN archivebox version
RUN /app/bin/docker_entrypoint.sh archivebox version
2018-10-14 02:47:30 +00:00
# Open up the interfaces to the outside world
VOLUME "$DATA_DIR"
EXPOSE 8000
2020-07-22 05:30:58 +00:00
2021-12-03 02:03:19 +00:00
# Optional:
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
2021-02-17 23:24:38 +00:00
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
2021-03-01 03:53:23 +00:00
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]