mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-29 07:30:23 +00:00
merge fixes
This commit is contained in:
parent
b4c3aa5097
commit
ac73fb5129
6 changed files with 40 additions and 41 deletions
11
Dockerfile
11
Dockerfile
|
@ -10,7 +10,7 @@
|
||||||
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
||||||
# Multi-arch build:
|
# Multi-arch build:
|
||||||
# docker buildx create --use
|
# docker buildx create --use
|
||||||
# docker buildx build . --platform=linux/amd64,linux/arm64,linux/arm/v7 --push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
|
# docker buildx build . --platform=linux/amd64,linux/arm64--push -t archivebox/archivebox:latest -t archivebox/archivebox:dev
|
||||||
#
|
#
|
||||||
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
# Read more about [developing Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
|
||||||
|
|
||||||
|
@ -195,9 +195,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$T
|
||||||
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
|
&& export CHROME_BINARY="$(python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')"; \
|
||||||
else \
|
else \
|
||||||
# fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
|
# fall back to installing Chromium via apt-get on platforms not supported by playwright (e.g. risc, ARMv7, etc.)
|
||||||
apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
# apt-get install -qq -y -t bookworm-backports --no-install-recommends \
|
||||||
chromium \
|
# chromium \
|
||||||
&& export CHROME_BINARY="$(which chromium)"; \
|
# && export CHROME_BINARY="$(which chromium)"; \
|
||||||
|
echo 'armv7 no longer supported in versions after v0.7.3' \
|
||||||
|
exit 1; \
|
||||||
fi \
|
fi \
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
&& rm -rf /var/lib/apt/lists/* \
|
||||||
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
|
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
|
||||||
|
@ -275,7 +277,6 @@ ENV IN_DOCKER=True \
|
||||||
GOOGLE_DEFAULT_CLIENT_SECRET=no \
|
GOOGLE_DEFAULT_CLIENT_SECRET=no \
|
||||||
ALLOWED_HOSTS=*
|
ALLOWED_HOSTS=*
|
||||||
## No need to set explicitly, these values will be autodetected by archivebox in docker:
|
## No need to set explicitly, these values will be autodetected by archivebox in docker:
|
||||||
# CHROME_SANDBOX=False \
|
|
||||||
# WGET_BINARY="wget" \
|
# WGET_BINARY="wget" \
|
||||||
# YOUTUBEDL_BINARY="yt-dlp" \
|
# YOUTUBEDL_BINARY="yt-dlp" \
|
||||||
# CHROME_BINARY="/usr/bin/chromium-browser" \
|
# CHROME_BINARY="/usr/bin/chromium-browser" \
|
||||||
|
|
|
@ -1076,7 +1076,7 @@ Because ArchiveBox is designed to ingest a large volume of URLs with multiple co
|
||||||
<li><strong>Don't store large collections on older filesystems like EXT3/FAT</strong> as they may not be able to handle more than 50k directory entries in the <code>data/archive/</code> folder.
|
<li><strong>Don't store large collections on older filesystems like EXT3/FAT</strong> as they may not be able to handle more than 50k directory entries in the <code>data/archive/</code> folder.
|
||||||
</li>
|
</li>
|
||||||
<li><strong>Try to keep the <code>data/index.sqlite3</code> file on local drive (not a network mount)</strong> or SSD for maximum performance, however the <code>data/archive/</code> folder can be on a network mount or slower HDD.</li>
|
<li><strong>Try to keep the <code>data/index.sqlite3</code> file on local drive (not a network mount)</strong> or SSD for maximum performance, however the <code>data/archive/</code> folder can be on a network mount or slower HDD.</li>
|
||||||
<li>If using Docker or NFS/SMB/FUSE for the `data/archive/` folder, you may need to set <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid"><code>PUID</code> & <code>PGID</code></a> and <a href="https://github.com/ArchiveBox/ArchiveBox/issues/1304">disable <code>root_squash</code></a> on your fileshare server.
|
<li>If using Docker or NFS/SMB/FUSE for the <code>data/archive/</code> folder, you may need to set <a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration#puid--pgid"><code>PUID</code> & <code>PGID</code></a> and <a href="https://github.com/ArchiveBox/ArchiveBox/issues/1304">disable <code>root_squash</code></a> on your fileshare server.
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|
||||||
|
|
|
@ -250,7 +250,7 @@ def load_main_index(out_dir: Path=OUTPUT_DIR, warn: bool=True) -> List[Link]:
|
||||||
"""parse and load existing index with any new links from import_path merged in"""
|
"""parse and load existing index with any new links from import_path merged in"""
|
||||||
from core.models import Snapshot
|
from core.models import Snapshot
|
||||||
try:
|
try:
|
||||||
return Snapshot.objects.all()
|
return Snapshot.objects.all().only('id')
|
||||||
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
except (KeyboardInterrupt, SystemExit):
|
||||||
raise SystemExit(0)
|
raise SystemExit(0)
|
||||||
|
|
|
@ -8,32 +8,26 @@
|
||||||
# Documentation:
|
# Documentation:
|
||||||
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
|
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
|
||||||
|
|
||||||
version: '3.9'
|
|
||||||
|
|
||||||
services:
|
services:
|
||||||
archivebox:
|
archivebox:
|
||||||
#image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
|
image: archivebox/archivebox
|
||||||
image: archivebox/archivebox:dev
|
|
||||||
command: server --quick-init 0.0.0.0:8000
|
|
||||||
ports:
|
ports:
|
||||||
- 8000:8000
|
- 8000:8000
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/data
|
- ./data:/data
|
||||||
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
|
|
||||||
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
|
|
||||||
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
|
|
||||||
environment:
|
environment:
|
||||||
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
|
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
|
||||||
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
|
||||||
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
|
|
||||||
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
|
|
||||||
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
|
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
|
||||||
# - ADMIN_PASSWORD=SomeSecretPassword
|
# - ADMIN_PASSWORD=SomeSecretPassword
|
||||||
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
|
# - PUID=911 # set to your host user's UID & GID if you encounter permissions issues
|
||||||
# - PGID=911
|
# - PGID=911
|
||||||
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
|
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
|
||||||
# - SEARCH_BACKEND_HOST_NAME=sonic
|
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
|
||||||
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
|
||||||
|
- SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
|
||||||
|
- SEARCH_BACKEND_HOST_NAME=sonic
|
||||||
|
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||||
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
|
||||||
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
|
||||||
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
|
||||||
|
@ -51,21 +45,25 @@ services:
|
||||||
|
|
||||||
######## Optional Addons: tweak examples below as needed for your specific use case ########
|
######## Optional Addons: tweak examples below as needed for your specific use case ########
|
||||||
|
|
||||||
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
|
### Runs the Sonic full-text search backend, config file is auto-downloaded into sonic.cfg:
|
||||||
# $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
|
|
||||||
# After starting, backfill any existing Snapshots into the full-text index:
|
# After starting, backfill any existing Snapshots into the full-text index:
|
||||||
# $ docker-compose run archivebox update --index-only
|
# $ docker-compose run archivebox update --index-only
|
||||||
|
|
||||||
# sonic:
|
sonic:
|
||||||
# image: valeriansaliou/sonic:latest
|
image: valeriansaliou/sonic
|
||||||
# expose:
|
build:
|
||||||
# - 1491
|
dockerfile_inline: |
|
||||||
# environment:
|
FROM quay.io/curl/curl:latest AS setup
|
||||||
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
RUN curl -fsSL 'https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/main/etc/sonic.cfg' > /tmp/sonic.cfg
|
||||||
# volumes:
|
FROM valeriansaliou/sonic:latest
|
||||||
# - ./sonic.cfg:/etc/sonic.cfg:ro
|
COPY --from=setup /tmp/sonic.cfg /etc/sonic.cfg
|
||||||
# - ./data/sonic:/var/lib/sonic/store
|
expose:
|
||||||
|
- 1491
|
||||||
|
environment:
|
||||||
|
- SEARCH_BACKEND_PASSWORD=SomeSecretPassword
|
||||||
|
volumes:
|
||||||
|
- ./etc/sonic.cfg:/etc/sonic.cfg
|
||||||
|
- ./data/sonic:/var/lib/sonic/store
|
||||||
|
|
||||||
### Example: To run pihole in order to block ad/tracker requests during archiving,
|
### Example: To run pihole in order to block ad/tracker requests during archiving,
|
||||||
# uncomment this block and set up pihole using its admin interface
|
# uncomment this block and set up pihole using its admin interface
|
||||||
|
|
|
@ -8,6 +8,6 @@
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@postlight/parser": "^2.2.3",
|
"@postlight/parser": "^2.2.3",
|
||||||
"readability-extractor": "github:ArchiveBox/readability-extractor",
|
"readability-extractor": "github:ArchiveBox/readability-extractor",
|
||||||
"single-file-cli": "^1.1.46"
|
"single-file-cli": "^1.1.54"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,15 +15,16 @@ dependencies = [
|
||||||
"dateparser>=1.0.0",
|
"dateparser>=1.0.0",
|
||||||
"django-extensions>=3.2.3",
|
"django-extensions>=3.2.3",
|
||||||
"django>=4.2.0,<5.0",
|
"django>=4.2.0,<5.0",
|
||||||
|
"setuptools>=69.0.3",
|
||||||
"feedparser>=6.0.11",
|
"feedparser>=6.0.11",
|
||||||
"ipython>5.0.0",
|
"ipython>5.0.0",
|
||||||
"mypy-extensions>=0.4.3",
|
"mypy-extensions>=0.4.3",
|
||||||
"python-crontab>=2.5.1",
|
"python-crontab>=2.5.1",
|
||||||
"requests>=2.24.0",
|
"requests>=2.24.0",
|
||||||
"w3lib>=1.22.0",
|
"w3lib>=1.22.0",
|
||||||
"yt-dlp>=2023.10.13",
|
"yt-dlp>=2024.3.10",
|
||||||
# dont add playwright becuase packages without sdists cause trouble on many build systems that refuse to install wheel-only packages
|
# dont add playwright becuase packages without sdists cause trouble on many build systems that refuse to install wheel-only packages
|
||||||
# "playwright>=1.39.0; platform_machine != 'armv7l'",
|
"playwright>=1.39.0; platform_machine != 'armv7l'",
|
||||||
]
|
]
|
||||||
|
|
||||||
classifiers = [
|
classifiers = [
|
||||||
|
@ -64,11 +65,11 @@ classifiers = [
|
||||||
sonic = [
|
sonic = [
|
||||||
# echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
|
# echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
|
||||||
# curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
|
# curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
|
||||||
|
# apt install sonic
|
||||||
"sonic-client>=0.0.5",
|
"sonic-client>=0.0.5",
|
||||||
]
|
]
|
||||||
ldap = [
|
ldap = [
|
||||||
# apt install libldap2-dev libsasl2-dev python3-ldap
|
# apt install libldap2-dev libsasl2-dev python3-ldap
|
||||||
"setuptools>=69.0.3",
|
|
||||||
"python-ldap>=3.4.3",
|
"python-ldap>=3.4.3",
|
||||||
"django-auth-ldap>=4.1.0",
|
"django-auth-ldap>=4.1.0",
|
||||||
]
|
]
|
||||||
|
@ -83,7 +84,6 @@ ldap = [
|
||||||
[tool.pdm.dev-dependencies]
|
[tool.pdm.dev-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
# building
|
# building
|
||||||
"setuptools>=69.0.3",
|
|
||||||
"wheel",
|
"wheel",
|
||||||
"pdm",
|
"pdm",
|
||||||
"homebrew-pypi-poet>=0.10.0",
|
"homebrew-pypi-poet>=0.10.0",
|
||||||
|
|
Loading…
Reference in a new issue