Merge branch 'dev' into fts5-search

This commit is contained in:
Nick Sweeting 2023-10-20 04:23:50 -07:00 committed by GitHub
commit 53355be46a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 3071 additions and 1615 deletions

View file

@ -5,16 +5,21 @@ __pycache__/
.mypy_cache/
.pytest_cache/
.github/
.git/
.pdm-build/
.pdm-python/
.eggs/
venv/
.venv/
.docker-venv/
node_modules/
build/
dist/
pip_dist/
!pip_dist/archivebox.egg-info/requires.txt
brew_dist/
deb_dist/
pip_dist/
assets/
data/

View file

@ -7,7 +7,7 @@ on:
jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v2
@ -18,7 +18,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: 3.9
python-version: 3.11
architecture: x64
- name: Build Python Package

2
.gitignore vendored
View file

@ -13,6 +13,8 @@ venv/
node_modules/
# Packaging artifacts
.pdm-python
.pdm-build
archivebox.egg-info
archivebox-*.tar.gz
build/

View file

@ -16,15 +16,17 @@
# Archivebox](https://github.com/ArchiveBox/ArchiveBox#archivebox-development).
FROM python:3.11-slim-bullseye
FROM debian:bookworm-backports
LABEL name="archivebox" \
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
description="All-in-one personal internet archiving container" \
homepage="https://github.com/ArchiveBox/ArchiveBox" \
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
# System-level base config
######### Base System Setup ####################################
# Global system-level config
ENV TZ=UTC \
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
@ -32,103 +34,146 @@ ENV TZ=UTC \
PYTHONIOENCODING=UTF-8 \
PYTHONUNBUFFERED=1 \
DEBIAN_FRONTEND=noninteractive \
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
npm_config_loglevel=error
# Application-level base config
# Application-level config
ENV CODE_DIR=/app \
VENV_PATH=/venv \
DATA_DIR=/data \
NODE_DIR=/node \
GLOBAL_VENV=/venv \
APP_VENV=/app/.venv \
NODE_MODULES=/app/node_modules \
ARCHIVEBOX_USER="archivebox"
ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin"
# Create non-privileged user for archivebox and chrome
RUN groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
RUN echo "[*] Setting up system environment..." \
&& groupadd --system $ARCHIVEBOX_USER \
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \
&& mkdir -p /etc/apt/keyrings
# Install system dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
dumb-init gosu cron unzip curl \
# Install system apt dependencies (adding backports to access more recent apt updates)
RUN echo "[+] Installing system dependencies..." \
&& echo 'deb https://deb.debian.org/debian bullseye-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \
&& apt-get update -qq \
&& apt-get install -qq -y \
apt-transport-https ca-certificates gnupg2 curl wget \
zlib1g-dev dumb-init gosu cron unzip \
nano iputils-ping dnsutils htop procps \
# 1. packaging dependencies
# 2. docker and init system dependencies
# 3. frivolous CLI helpers to make debugging failed archiving easier
&& mkdir -p /etc/apt/keyrings \
&& rm -rf /var/lib/apt/lists/*
# Install apt dependencies
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
wget curl chromium git ffmpeg youtube-dl ripgrep \
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& ln -s /usr/bin/chromium /usr/bin/chromium-browser \
&& rm -rf /var/lib/apt/lists/*
######### Language Environments ####################################
# Install Node environment
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
&& echo 'deb https://deb.nodesource.com/node_18.x buster main' >> /etc/apt/sources.list \
RUN echo "[+] Installing Node environment..." \
&& echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main' >> /etc/apt/sources.list.d/nodejs.list \
&& curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
# && npm install -g npm \
&& apt-get install -qq -y nodejs \
&& npm i -g npm \
&& node --version \
&& npm --version
# Install Python environment
RUN echo "[+] Installing Python environment..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
python3 python3-pip python3-venv python3-setuptools python3-wheel python-dev-is-python3 \
python3-ldap libldap2-dev libsasl2-dev libssl-dev \
&& rm /usr/lib/python3*/EXTERNALLY-MANAGED \
&& python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \
&& $GLOBAL_VENV/bin/pip install --upgrade pip pdm setuptools wheel python-ldap \
&& rm -rf /var/lib/apt/lists/*
######### Extractor Dependencies ##################################
# Install apt dependencies
RUN echo "[+] Installing extractor APT dependencies..." \
&& apt-get update -qq \
&& apt-get install -qq -y -t bookworm-backports --no-install-recommends \
curl wget git yt-dlp ffmpeg ripgrep \
# Packages we have also needed in the past:
# youtube-dl wget2 aria2 python3-pyxattr rtmpdump libfribidi-bin mpv \
# fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
&& rm -rf /var/lib/apt/lists/*
# Install chromium browser using playwright
ENV PLAYWRIGHT_BROWSERS_PATH="/browsers"
RUN echo "[+] Installing extractor Chromium dependency..." \
&& apt-get update -qq \
&& $GLOBAL_VENV/bin/pip install playwright \
&& $GLOBAL_VENV/bin/playwright install --with-deps chromium \
&& CHROME_BINARY="$($GLOBAL_VENV/bin/python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \
&& ln -s "$CHROME_BINARY" /usr/bin/chromium-browser \
&& mkdir -p "/home/${ARCHIVEBOX_USER}/.config/chromium/Crash Reports/pending/" \
&& chown -R $ARCHIVEBOX_USER "/home/${ARCHIVEBOX_USER}/.config"
# Install Node dependencies
WORKDIR "$NODE_DIR"
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
npm_config_loglevel=error
ADD ./package.json ./package.json
ADD ./package-lock.json ./package-lock.json
RUN npm ci
# Install Python dependencies
WORKDIR "$CODE_DIR"
ENV PATH="${PATH}:$VENV_PATH/bin"
RUN python -m venv --clear --symlinks "$VENV_PATH" \
&& pip install --upgrade --quiet pip setuptools \
&& mkdir -p "$CODE_DIR/archivebox"
ADD "./setup.py" "$CODE_DIR/"
ADD "./package.json" "$CODE_DIR/archivebox/"
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev libldap2-dev libsasl2-dev \
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
&& pip install -r /tmp/requirements.txt \
&& pip install --upgrade youtube-dl yt-dlp \
&& apt-get purge -y build-essential python-dev python3-dev libldap2-dev libsasl2-dev \
&& apt-get autoremove -y \
&& rm -rf /var/lib/apt/lists/*
COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/"
RUN echo "[+] Installing extractor Node dependencies..." \
&& npm ci --prefer-offline --no-audit \
&& npm version
# Install apt development dependencies
# RUN apt-get install -qq \
# && apt-get install -qq -y --no-install-recommends \
# python3 python3-dev python3-pip python3-venv python3-all \
# dh-python debhelper devscripts dput software-properties-common \
# python3-distutils python3-setuptools python3-wheel python3-stdeb
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
# && pip install --quiet -r /tmp/dev_requirements.txt
######### Build Dependencies ####################################
# Install ArchiveBox Python package and its dependencies
WORKDIR "$CODE_DIR"
ADD . "$CODE_DIR"
RUN chown -R root:root . && chmod a+rX -R . && pip install -e .
# # Installing Python dependencies to build from source
# WORKDIR "$CODE_DIR"
# COPY --chown=root:root --chmod=755 "./pyproject.toml" "./pdm.lock" "$CODE_DIR/"
# RUN echo "[+] Installing project Python dependencies..." \
# && apt-get update -qq \
# && apt-get install -qq -y -t bookworm-backports --no-install-recommends \
# build-essential libssl-dev libldap2-dev libsasl2-dev \
# && pdm use -f $GLOBAL_VENV \
# && pdm install --fail-fast --no-lock --group :all --no-self \
# && pdm build \
# && apt-get purge -y \
# build-essential libssl-dev libldap2-dev libsasl2-dev \
# # these are only needed to build CPython libs, we discard after build phase to shrink layer size
# && apt-get autoremove -y \
# && rm -rf /var/lib/apt/lists/*
# Install ArchiveBox Python package from source
COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
RUN echo "[*] Installing ArchiveBox package from /app..." \
&& apt-get update -qq \
&& $GLOBAL_VENV/bin/pip install -e "$CODE_DIR"[sonic,ldap]
####################################################
# Setup ArchiveBox runtime config
WORKDIR "$DATA_DIR"
ENV IN_DOCKER=True \
WGET_BINARY="wget" \
YOUTUBEDL_BINARY="yt-dlp" \
CHROME_SANDBOX=False \
CHROME_BINARY="/usr/bin/chromium-browser" \
USE_SINGLEFILE=True \
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
SINGLEFILE_BINARY="$NODE_MODULES/.bin/single-file" \
USE_READABILITY=True \
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
READABILITY_BINARY="$NODE_MODULES/.bin/readability-extractor" \
USE_MERCURY=True \
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser" \
YOUTUBEDL_BINARY="yt-dlp"
MERCURY_BINARY="$NODE_MODULES/.bin/postlight-parser"
# Print version for nice docker finish summary
# RUN archivebox version
RUN /app/bin/docker_entrypoint.sh archivebox version
RUN echo "[√] Finished Docker build succesfully. Saving build summary in: /version_info.txt" \
&& uname -a | tee -a /version_info.txt \
&& env --chdir="$NODE_DIR" npm version | tee -a /version_info.txt \
&& env --chdir="$CODE_DIR" pdm info | tee -a /version_info.txt \
&& "$CODE_DIR/bin/docker_entrypoint.sh" archivebox version 2>&1 | tee -a /version_info.txt
####################################################
# Open up the interfaces to the outside world
VOLUME "$DATA_DIR"
VOLUME "/data"
EXPOSE 8000
# Optional:

View file

@ -10,7 +10,7 @@
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community">Community</a> |
<a href="https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap">Roadmap</a>
<pre lang="bash"><code style="white-space: pre-line">"Your own personal internet archive" (网站存档 / 爬虫)
<pre lang="bash" align="center"><code style="white-space: pre-line; text-align: center" align="center">"Your own personal internet archive" (网站存档 / 爬虫)
curl -sSL 'https://get.archivebox.io' | sh
</code></pre>
@ -588,7 +588,8 @@ Each snapshot subfolder `./archive/<timestamp>/` includes a static `index.json`
You can export the main index to browse it statically without needing to run a server.
*Note about large exports: These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.*
> **Note**
> These exports are not paginated, exporting many URLs or the entire archive at once may be slow. Use the filtering CLI flags on the `archivebox list` command to export specific Snapshots or ranges.
```bash
# archivebox list --help
@ -615,7 +616,7 @@ The paths in the static exports are relative, make sure to keep them next to you
### Archiving Private Content
<a id="archiving-private-urls"/>
<a id="archiving-private-urls"></a>
If you're importing pages with private content or URLs containing secret tokens you don't want public (e.g Google Docs, paywalled content, unlisted videos, etc.), **you may want to disable some of the extractor methods to avoid leaking that content to 3rd party APIs or the public**.
@ -985,6 +986,7 @@ archivebox init --setup
<details><summary><i>Click to expand...</i></summary>
Make sure to run this whenever you change things in `models.py`.
```bash
cd archivebox/
./manage.py makemigrations
@ -993,6 +995,7 @@ cd path/to/test/data/
archivebox shell
archivebox manage dbshell
```
(uses `pytest -s`)
https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-django-is-running
@ -1000,7 +1003,9 @@ https://stackoverflow.com/questions/1074212/how-can-i-see-the-raw-sql-queries-dj
#### Contributing a new extractor
<details><summary><i>Click to expand...</i></summary><br/><br/>
<details><summary><i>Click to expand...</i></summary>
<br/><br/>
ArchiveBox [`extractors`](https://github.com/ArchiveBox/ArchiveBox/blob/dev/archivebox/extractors/media.py) are external binaries or Python/Node scripts that ArchiveBox runs to archive content on a page.

34
SECURITY.md Normal file
View file

@ -0,0 +1,34 @@
# Security Policy
---
## Security Information
Please see this wiki page for important notices about ArchiveBox security, publishing your archives securely, and the dangers of executing archived JS:
https://github.com/ArchiveBox/ArchiveBox/wiki/Security-Overview
Also see this section of the README about important caveats when running ArchiveBox:
https://github.com/ArchiveBox/ArchiveBox?tab=readme-ov-file#caveats
You can also read these pages for more information about ArchiveBox's internals, development environment, DB schema, and more:
- https://github.com/ArchiveBox/ArchiveBox#archive-layout
- https://github.com/ArchiveBox/ArchiveBox#archivebox-development
- https://github.com/ArchiveBox/ArchiveBox/wiki/Upgrading-or-Merging-Archives
- https://github.com/ArchiveBox/ArchiveBox/wiki/Troubleshooting
---
## Reporting a Vulnerability
We use Github's built-in [Private Reporting](https://docs.github.com/en/code-security/security-advisories/guidance-on-reporting-and-writing-information-about-vulnerabilities/privately-reporting-a-security-vulnerability) feature to accept vulnerability reports.
1. Go to the Security tab on our Github repo: https://github.com/ArchiveBox/ArchiveBox/security
2. Click the ["Report a Vulnerability"](https://github.com/ArchiveBox/ArchiveBox/security/advisories/new) button
3. Fill out the form to submit the details of the report and it will be securely sent to the maintainers
You can also contact the maintainers via our public [Zulip Chat Server zulip.archivebox.io](https://zulip.archivebox.io) or [Twitter DMs @ArchiveBoxApp](https://twitter.com/ArchiveBoxApp).

View file

@ -91,6 +91,8 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'OUTPUT_PERMISSIONS': {'type': str, 'default': '644'},
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
'ADMIN_USERNAME': {'type': str, 'default': None},
'ADMIN_PASSWORD': {'type': str, 'default': None},
'URL_WHITELIST': {'type': str, 'default': None},
'ENFORCE_ATOMIC_WRITES': {'type': bool, 'default': True},
'TAG_SEPARATOR_PATTERN': {'type': str, 'default': r'[,]'},
@ -234,12 +236,11 @@ CONFIG_SCHEMA: Dict[str, ConfigDefaultDict] = {
'CURL_BINARY': {'type': str, 'default': 'curl'},
'GIT_BINARY': {'type': str, 'default': 'git'},
'WGET_BINARY': {'type': str, 'default': 'wget'},
'WGET_BINARY': {'type': str, 'default': 'wget'}, # also can accept wget2
'SINGLEFILE_BINARY': {'type': str, 'default': lambda c: bin_path('single-file')},
'READABILITY_BINARY': {'type': str, 'default': lambda c: bin_path('readability-extractor')},
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('mercury-parser')},
#'YOUTUBEDL_BINARY': {'type': str, 'default': 'youtube-dl'},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'},
'MERCURY_BINARY': {'type': str, 'default': lambda c: bin_path('postlight-parser')},
'YOUTUBEDL_BINARY': {'type': str, 'default': 'yt-dlp'}, # also can accept youtube-dl
'NODE_BINARY': {'type': str, 'default': 'node'},
'RIPGREP_BINARY': {'type': str, 'default': 'rg'},
'CHROME_BINARY': {'type': str, 'default': None},
@ -439,7 +440,7 @@ DYNAMIC_CONFIG_SCHEMA: ConfigDefaultDict = {
'READABILITY_VERSION': {'default': lambda c: bin_version(c['READABILITY_BINARY']) if c['USE_READABILITY'] else None},
'USE_MERCURY': {'default': lambda c: c['USE_MERCURY'] and c['SAVE_MERCURY']},
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury is unversioned
'MERCURY_VERSION': {'default': lambda c: '1.0.0' if shutil.which(str(bin_path(c['MERCURY_BINARY']))) else None}, # mercury doesnt expose version info until this is merged https://github.com/postlight/parser/pull/750
'USE_GIT': {'default': lambda c: c['USE_GIT'] and c['SAVE_GIT']},
'GIT_VERSION': {'default': lambda c: bin_version(c['GIT_BINARY']) if c['USE_GIT'] else None},

View file

@ -6,9 +6,6 @@ import re
import logging
import tempfile
import ldap
from django_auth_ldap.config import LDAPSearch
from pathlib import Path
from django.utils.crypto import get_random_string
@ -97,33 +94,43 @@ AUTHENTICATION_BACKENDS = [
]
if LDAP:
global AUTH_LDAP_SERVER_URI
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
try:
import ldap
from django_auth_ldap.config import LDAPSearch
global AUTH_LDAP_BIND_DN
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
global AUTH_LDAP_SERVER_URI
global AUTH_LDAP_BIND_DN
global AUTH_LDAP_BIND_PASSWORD
global AUTH_LDAP_USER_SEARCH
global AUTH_LDAP_USER_ATTR_MAP
global AUTH_LDAP_BIND_PASSWORD
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI
AUTH_LDAP_BIND_DN = LDAP_BIND_DN
AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD
global AUTH_LDAP_USER_SEARCH
AUTH_LDAP_USER_SEARCH = LDAPSearch(
LDAP_USER_BASE,
ldap.SCOPE_SUBTREE,
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
)
assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True'
global AUTH_LDAP_USER_ATTR_MAP
AUTH_LDAP_USER_ATTR_MAP = {
'username': LDAP_USERNAME_ATTR,
'first_name': LDAP_FIRSTNAME_ATTR,
'last_name': LDAP_LASTNAME_ATTR,
'email': LDAP_EMAIL_ATTR,
}
AUTH_LDAP_USER_SEARCH = LDAPSearch(
LDAP_USER_BASE,
ldap.SCOPE_SUBTREE,
'(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')',
)
AUTH_LDAP_USER_ATTR_MAP = {
'username': LDAP_USERNAME_ATTR,
'first_name': LDAP_FIRSTNAME_ATTR,
'last_name': LDAP_LASTNAME_ATTR,
'email': LDAP_EMAIL_ATTR,
}
AUTHENTICATION_BACKENDS = [
'django_auth_ldap.backend.LDAPBackend',
]
except ModuleNotFoundError:
sys.stderr.write('[X] Error: Found LDAP=True config but LDAP packages not installed. You may need to run: pip install archivebox[ldap]\n\n')
# dont hard exit here. in case the user is just running "archivebox version" or "archivebox help", we still want those to work despite broken ldap
# sys.exit(1)
AUTHENTICATION_BACKENDS = [
'django_auth_ldap.backend.LDAPBackend',
]
################################################################################
### Debug Settings

View file

@ -71,7 +71,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
result = run(cmd, cwd=out_dir, timeout=timeout)
try:
result_json = json.loads(result.stdout)
assert result_json and 'content' in result_json
assert result_json and 'content' in result_json, 'Readability output is not valid JSON'
except json.JSONDecodeError:
raise ArchiveError('Readability was not able to archive the page', result.stdout + result.stderr)
@ -85,7 +85,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
# "Downloaded: 76 files, 4.0M in 1.6s (2.52 MB/s)"
output_tail = [
line.strip()
for line in (result.stdout + result.stderr).decode().rsplit('\n', 3)[-3:]
for line in (result.stdout + result.stderr).decode().rsplit('\n', 5)[-5:]
if line.strip()
]
hints = (

View file

@ -533,11 +533,27 @@ def log_shell_welcome_msg():
### Helpers
@enforce_types
def pretty_path(path: Union[Path, str]) -> str:
def pretty_path(path: Union[Path, str], pwd: Union[Path, str]=OUTPUT_DIR) -> str:
"""convert paths like .../ArchiveBox/archivebox/../output/abc into output/abc"""
pwd = Path('.').resolve()
# parent = os.path.abspath(os.path.join(pwd, os.path.pardir))
return str(path).replace(str(pwd) + '/', './')
pwd = str(Path(pwd)) # .resolve()
path = str(path)
if not path:
return path
# replace long absolute paths with ./ relative ones to save on terminal output width
if path.startswith(pwd) and (pwd != '/'):
path = path.replace(pwd, '.', 1)
# quote paths containing spaces
if ' ' in path:
path = f'"{path}"'
# if path is just a plain dot, replace it back with the absolute path for clarity
if path == '.':
path = pwd
return path
@enforce_types
@ -578,6 +594,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
else:
color, symbol, note, num_files = 'lightyellow', '-', 'disabled', '-'
if folder['path']:
if Path(folder['path']).exists():
num_files = (
@ -592,13 +609,7 @@ def printable_folder_status(name: str, folder: Dict) -> str:
# add symbol @ next to filecount if path is a remote filesystem mount
num_files = f'{num_files} @' if num_files else '@'
path = str(folder['path']).replace(str(OUTPUT_DIR), '.') if folder['path'] else ''
if path and ' ' in path:
path = f'"{path}"'
# if path is just a plain dot, replace it back with the full path for clarity
if path == '.':
path = str(OUTPUT_DIR)
path = pretty_path(folder['path'])
return ' '.join((
ANSI[color],
@ -629,9 +640,7 @@ def printable_dependency_version(name: str, dependency: Dict) -> str:
else:
color, symbol, note, version = 'lightyellow', '-', 'disabled', '-'
path = str(dependency["path"]).replace(str(OUTPUT_DIR), '.') if dependency["path"] else ''
if path and ' ' in path:
path = f'"{path}"'
path = pretty_path(dependency['path'])
return ' '.join((
ANSI[color],

View file

@ -112,6 +112,8 @@ from .config import (
load_all_config,
CONFIG,
USER_CONFIG,
ADMIN_USERNAME,
ADMIN_PASSWORD,
get_real_name,
setup_django,
)
@ -216,7 +218,7 @@ def version(quiet: bool=False,
if not quiet:
# 0.6.3
# ArchiveBox v0.6.3 Cpython Linux Linux-4.19.121-linuxkit-x86_64-with-glibc2.28 x86_64 (in Docker) (in TTY)
# DEBUG=False IN_DOCKER=True IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 501:20 SEARCH_BACKEND=ripgrep
# DEBUG=False IN_DOCKER=True IS_TTY=True TZ=UTC FS_ATOMIC=True FS_REMOTE=False FS_PERMS=644 FS_USER=501:20 SEARCH_BACKEND=ripgrep
p = platform.uname()
print(
@ -236,7 +238,8 @@ def version(quiet: bool=False,
#f'DB=django.db.backends.sqlite3 (({CONFIG["SQLITE_JOURNAL_MODE"]})', # add this if we have more useful info to show eventually
f'FS_ATOMIC={ENFORCE_ATOMIC_WRITES}',
f'FS_REMOTE={OUTPUT_IS_REMOTE_FS}',
f'FS_PERMS={OUTPUT_PERMISSIONS} {PUID}:{PGID}',
f'FS_USER={PUID}:{PGID}',
f'FS_PERMS={OUTPUT_PERMISSIONS}',
f'SEARCH_BACKEND={SEARCH_BACKEND_ENGINE}',
)
print()
@ -251,19 +254,19 @@ def version(quiet: bool=False,
print()
print('{white}[i] Source-code locations:{reset}'.format(**ANSI))
for name, folder in CODE_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in CODE_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
print('{white}[i] Secrets locations:{reset}'.format(**ANSI))
for name, folder in EXTERNAL_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in EXTERNAL_LOCATIONS.items():
print(printable_folder_status(name, path))
print()
if DATA_LOCATIONS['OUTPUT_DIR']['is_valid']:
print('{white}[i] Data locations:{reset}'.format(**ANSI))
for name, folder in DATA_LOCATIONS.items():
print(printable_folder_status(name, folder))
for name, path in DATA_LOCATIONS.items():
print(printable_folder_status(name, path))
else:
print()
print('{white}[i] Data locations:{reset}'.format(**ANSI))
@ -419,14 +422,16 @@ def init(force: bool=False, quick: bool=False, setup: bool=False, out_dir: Path=
write_main_index(list(pending_links.values()), out_dir=out_dir)
print('\n{green}----------------------------------------------------------------------{reset}'.format(**ANSI))
from django.contrib.auth.models import User
if (ADMIN_USERNAME and ADMIN_PASSWORD) and not User.objects.filter(username=ADMIN_USERNAME).exists():
print('{green}[+] Found ADMIN_USERNAME and ADMIN_PASSWORD configuration options, creating new admin user.{reset}'.format(**ANSI))
User.objects.create_superuser(username=ADMIN_USERNAME, password=ADMIN_PASSWORD)
if existing_index:
print('{green}[√] Done. Verified and updated the existing ArchiveBox collection.{reset}'.format(**ANSI))
else:
# TODO: allow creating new supersuer via env vars on first init
# if config.HTTP_USER and config.HTTP_PASS:
# from django.contrib.auth.models import User
# User.objects.create_superuser(HTTP_USER, '', HTTP_PASS)
print('{green}[√] Done. A new ArchiveBox collection was initialized ({} links).{reset}'.format(len(all_links) + len(pending_links), **ANSI))
json_index = out_dir / JSON_INDEX_FILENAME

View file

@ -1,62 +1,3 @@
{% extends "base.html" %}
{% load static %}
{% block body %}
<div id="toolbar">
<form id="changelist-search" action="{% url 'public-index' %}" method="get">
<div>
<label for="searchbar"><img src="/static/admin/img/search.svg" alt="Search"></label>
<input type="text" size="40" name="q" value="" id="searchbar" autofocus placeholder="Title, URL, tags, timestamp, or content...".>
<input type="submit" value="Search" style="height: 36px; padding-top: 6px; margin: 8px"/>
<input type="button"
value="♺"
title="Refresh..."
onclick="location.href='{% url 'public-index' %}'"
style="background-color: rgba(121, 174, 200, 0.8); height: 30px; font-size: 0.8em; margin-top: 12px; padding-top: 6px; float:right">
</input>
</div>
</form>
</div>
<table id="table-bookmarks">
<thead>
<tr>
<th style="width: 100px;">Bookmarked</th>
<th style="width: 26vw;">Snapshot ({{object_list|length}})</th>
<th style="width: 140px">Files</th>
<th style="width: 16vw;whitespace:nowrap;overflow-x:hidden;">Original URL</th>
</tr>
</thead>
<tbody>
{% for link in object_list %}
{% include 'main_index_row.html' with link=link %}
{% endfor %}
</tbody>
</table>
<center>
<span class="step-links">
{% if page_obj.has_previous %}
<a href="{% url 'public-index' %}?page=1">&laquo; first</a>
<a href="{% url 'public-index' %}?page={{ page_obj.previous_page_number }}">previous</a>
{% endif %}
<span class="current">
Page {{ page_obj.number }} of {{ page_obj.paginator.num_pages }}.
</span>
{% if page_obj.has_next %}
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
{% if page_obj.has_next %}
<a href="{% url 'public-index' %}?page={{ page_obj.next_page_number }}">next </a>
<a href="{% url 'public-index' %}?page={{ page_obj.paginator.num_pages }}">last &raquo;</a>
{% endif %}
</span>
<br>
</center>
{% endblock %}
{% extends "admin/base_site.html" %}
{% load i18n admin_urls static admin_list %}
{% load core_tags %}

View file

@ -33,7 +33,7 @@
<br/>
<div class="loader"></div>
<br/>
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for progress...
Check the server log or the <a href="/admin/core/archiveresult/?o=-1">Log</a> page for detailed progress...
</center>
</div>
<form id="add-form" method="POST" class="p-form">{% csrf_token %}
@ -46,19 +46,22 @@
</form>
<br/><br/><br/>
<center id="delay-warning" style="display: none">
<small>(it's safe to leave this page, adding will continue in the background)</small>
<small>(you will be redirected to your <a href="/">Snapshot list</a> momentarily, its safe to close this page at any time)</small>
</center>
{% if absolute_add_path %}
<center id="bookmarklet">
<!-- <center id="bookmarklet">
<p>Bookmark this link to quickly add to your archive:
<a href="javascript:void(window.open('{{ absolute_add_path }}?url='+encodeURIComponent(document.location.href)));">Add to ArchiveBox</a></p>
</center>
</center> -->
{% endif %}
<script>
document.getElementById('add-form').addEventListener('submit', function(event) {
document.getElementById('in-progress').style.display = 'block'
document.getElementById('add-form').style.display = 'none'
document.getElementById('delay-warning').style.display = 'block'
setTimeout(function() {
window.location = '/'
}, 2000)
return true
})
</script>

View file

@ -65,8 +65,9 @@ check_platforms || (recreate_builder && check_platforms) || exit 1
echo "[+] Building archivebox:$VERSION docker image..."
#docker build . \
docker buildx build --platform "$REQUIRED_PLATFORMS" --push . \
# docker builder prune
# docker build . --no-cache -t archivebox-dev \
docker buildx build --platform "$REQUIRED_PLATFORMS" --load . \
-t archivebox \
-t archivebox:$TAG_NAME \
-t archivebox:$VERSION \

View file

@ -25,7 +25,10 @@ cd "$REPO_DIR"
rm -Rf build dist
echo "[+] Building sdist, bdist_wheel, and egg_info"
python3 setup.py \
sdist --dist-dir=./pip_dist \
bdist_wheel --dist-dir=./pip_dist \
egg_info --egg-base=./pip_dist
# python3 setup.py \
# sdist --dist-dir=./pip_dist \
# bdist_wheel --dist-dir=./pip_dist \
# egg_info --egg-base=./pip_dist
# pip install --upgrade pip setuptools build
python -m build

View file

@ -12,22 +12,26 @@ if [[ -n "$PGID" && "$PGID" != 0 ]]; then
groupmod -g "$PGID" "$ARCHIVEBOX_USER" > /dev/null 2>&1
fi
export PUID="$(id -u archivebox)"
export PGID="$(id -g archivebox)"
# Set the permissions of the data dir to match the archivebox user
# Check the permissions of the data dir (or create if it doesn't exist)
if [[ -d "$DATA_DIR/archive" ]]; then
# check data directory permissions
if [[ ! "$(stat -c %u $DATA_DIR/archive)" = "$(id -u archivebox)" ]]; then
echo "Change in ownership detected, please be patient while we chown existing files"
echo "This could take some time..."
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER -R "$DATA_DIR"
if touch "$DATA_DIR/archive/.permissions_test_safe_to_delete"; then
# It's fine, we are able to write to the data directory
rm "$DATA_DIR/archive/.permissions_test_safe_to_delete"
# echo "[√] Permissions are correct"
else
echo "[X] Permissions Error: ArchiveBox is not able to write to your data dir. You need to fix the data dir ownership and retry:" >2
echo " chown -R $PUID:$PGID data" >2
echo " https://docs.linuxserver.io/general/understanding-puid-and-pgid" >2
exit 1
fi
else
# create data directory
mkdir -p "$DATA_DIR/logs"
chown -R $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
fi
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR"
chown $ARCHIVEBOX_USER:$ARCHIVEBOX_USER "$DATA_DIR" "$DATA_DIR"/*
# Drop permissions to run commands as the archivebox user
if [[ "$1" == /* || "$1" == "echo" || "$1" == "archivebox" ]]; then

View file

@ -34,6 +34,8 @@ services:
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
# - PUID=1000 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=1000
# - ADMIN_USERNAME=admin # create an admin user on first run with the given user/pass combo
# - ADMIN_PASSWORD=SomeSecretPassword
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
# - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword

1718
package-lock.json generated

File diff suppressed because it is too large Load diff

View file

@ -1,14 +1,13 @@
{
"name": "archivebox",
"version": "0.6.3",
"version": "0.7.0",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"repository": "github:ArchiveBox/ArchiveBox",
"license": "MIT",
"dependencies": {
"@postlight/mercury-parser": "git+https://github.com/postlight/mercury-parser.git",
"playwright": "^1.37.1",
"@postlight/parser": "^2.2.3",
"readability-extractor": "git+https://github.com/ArchiveBox/readability-extractor.git",
"single-file-cli": "^1.0.63"
"single-file-cli": "^1.1.12"
}
}

2077
pdm.lock Normal file

File diff suppressed because it is too large Load diff

121
pyproject.toml Normal file
View file

@ -0,0 +1,121 @@
[project]
name = "archivebox"
version = "0.7.0"
description = "Self-hosted internet archiving solution."
authors = [
{name = "Nick Sweeting", email = "setup.py@archivebox.io"},
]
dependencies = [
"setuptools>=68.2.2",
"croniter>=0.3.34",
"dateparser>=1.0.0",
"django-extensions>=3.0.3",
"django>=3.1.3,<3.2",
"ipython>5.0.0",
"mypy-extensions>=0.4.3",
"python-crontab>=2.5.1",
"requests>=2.24.0",
"w3lib>=1.22.0",
# "youtube-dl>=2021.04.17",
"yt-dlp>=2021.4.11",
"playwright>=1.39.0",
]
requires-python = ">=3.9"
readme = "README.md"
license = {text = "MIT"}
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Console",
"Environment :: Web Environment",
"Framework :: Django",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"Intended Audience :: Legal Industry",
"Intended Audience :: System Administrators",
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
"Topic :: Sociology :: History",
"Topic :: Software Development :: Libraries :: Python Modules",
"Topic :: System :: Archiving",
"Topic :: System :: Archiving :: Backup",
"Topic :: System :: Recovery Tools",
"Topic :: Utilities",
"Typing :: Typed",
]
# pdm lock -G:all
# pdm install -G:all
[tool.pdm.dev-dependencies]
build = [
"pdm",
"bottle",
"setuptools",
"stdeb",
"twine",
"wheel",
]
lint = [
"flake8",
"mypy",
"django-stubs",
]
test = [
"pytest",
]
debug = [
"django-debug-toolbar",
"djdt_flamegraph",
"ipdb",
]
doc = [
"recommonmark",
"sphinx",
"sphinx-rtd-theme",
]
[project.optional-dependencies]
sonic = [
# echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list
# curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg
"sonic-client>=0.0.5",
]
ldap = [
# apt install libldap2-dev libsasl2-dev
"django-auth-ldap>=4.1.0",
]
[project.scripts]
archivebox = "archivebox.cli:main"
[tool.pdm.scripts]
lint = "./bin/lint.sh"
test = "./bin/test.sh"
# all = {composite = ["lint mypackage/", "test -v tests/"]}
[build-system]
requires = ["pdm-backend"]
build-backend = "pdm.backend"
[project.urls]
Homepage = "https://github.com/ArchiveBox/ArchiveBox"
Source = "https://github.com/ArchiveBox/ArchiveBox"
Documentation = "https://github.com/ArchiveBox/ArchiveBox/wiki"
"Bug Tracker" = "https://github.com/ArchiveBox/ArchiveBox/issues"
Changelog = "https://github.com/ArchiveBox/ArchiveBox/releases"
Roadmap = "https://github.com/ArchiveBox/ArchiveBox/wiki/Roadmap"
Community = "https://github.com/ArchiveBox/ArchiveBox/wiki/Web-Archiving-Community"
Demo = "https://demo.archivebox.io"
Donate = "https://github.com/ArchiveBox/ArchiveBox/wiki/Donations"

266
setup.py
View file

@ -1,146 +1,150 @@
import json
import setuptools
from setuptools.command.test import test
#####################################################################################
# THIS FILE IS DEPRECATED AND WILL BE REMOVED EVENTUALLU
# ALL FUTURE CHANGES SHOULD HAPPEN IN pyproject.toml with pdm
#####################################################################################
from pathlib import Path
# import json
# import setuptools
# from setuptools.command.test import test
# from pathlib import Path
PKG_NAME = "archivebox"
DESCRIPTION = "Self-hosted internet archiving solution."
LICENSE = "MIT"
AUTHOR = "Nick Sweeting"
AUTHOR_EMAIL="git@nicksweeting.com"
REPO_URL = "https://github.com/ArchiveBox/ArchiveBox"
PROJECT_URLS = {
"Source": f"{REPO_URL}",
"Documentation": f"{REPO_URL}/wiki",
"Bug Tracker": f"{REPO_URL}/issues",
"Changelog": f"{REPO_URL}/releases",
"Roadmap": f"{REPO_URL}/wiki/Roadmap",
"Community": f"{REPO_URL}/wiki/Web-Archiving-Community",
"Demo": f"https://demo.archivebox.io",
"Donate": f"{REPO_URL}/wiki/Donations",
}
# PKG_NAME = "archivebox"
# DESCRIPTION = "Self-hosted internet archiving solution."
# LICENSE = "MIT"
# AUTHOR = "Nick Sweeting"
# AUTHOR_EMAIL="setup.py@archivebox.io"
# REPO_URL = "https://github.com/ArchiveBox/ArchiveBox"
# PROJECT_URLS = {
# "Source": f"{REPO_URL}",
# "Documentation": f"{REPO_URL}/wiki",
# "Bug Tracker": f"{REPO_URL}/issues",
# "Changelog": f"{REPO_URL}/releases",
# "Roadmap": f"{REPO_URL}/wiki/Roadmap",
# "Community": f"{REPO_URL}/wiki/Web-Archiving-Community",
# "Demo": f"https://demo.archivebox.io",
# "Donate": f"{REPO_URL}/wiki/Donations",
# }
ROOT_DIR = Path(__file__).parent.resolve()
PACKAGE_DIR = ROOT_DIR / PKG_NAME
# ROOT_DIR = Path(__file__).parent.resolve()
# PACKAGE_DIR = ROOT_DIR / PKG_NAME
README = (PACKAGE_DIR / "README.md").read_text(encoding='utf-8', errors='ignore')
VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['version']
# README = (PACKAGE_DIR / "README.md").read_text(encoding='utf-8', errors='ignore')
# VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['version']
PYTHON_REQUIRES = ">=3.7"
SETUP_REQUIRES = ["wheel"]
INSTALL_REQUIRES = [
# only add things here that have corresponding apt python3-packages available
# anything added here also needs to be added to our package dependencies in
# stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
# if there is no apt python3-package equivalent, then vendor it instead in
# ./archivebox/vendor/
"requests>=2.24.0",
"mypy-extensions>=0.4.3",
"django>=3.1.3,<3.2",
"django-extensions>=3.0.3",
"dateparser>=1.0.0",
"youtube-dl>=2021.04.17",
"yt-dlp>=2021.4.11",
"python-crontab>=2.5.1",
"croniter>=0.3.34",
"w3lib>=1.22.0",
"ipython>5.0.0",
"django-auth-ldap>=4.1.0"
]
EXTRAS_REQUIRE = {
'sonic': [
"sonic-client>=0.0.5",
],
'dev': [
"setuptools",
"twine",
"wheel",
"flake8",
"ipdb",
"mypy",
"django-stubs",
"sphinx",
"sphinx-rtd-theme",
"recommonmark",
"pytest",
"bottle",
"stdeb",
"django-debug-toolbar",
"djdt_flamegraph",
],
}
# class DisabledTestCommand(test):
# def run(self):
# # setup.py test is deprecated, disable it here by force so stdeb doesnt run it
# print('\n[X] Running tests via setup.py test is deprecated.')
# print(' Hint: Use the ./bin/test.sh script or pytest instead')
# To see when setup.py gets called (uncomment for debugging):
# import sys
# print(PACKAGE_DIR, f" (v{VERSION})")
# print('>', sys.executable, *sys.argv)
# PYTHON_REQUIRES = ">=3.9"
# SETUP_REQUIRES = ["wheel"]
# INSTALL_REQUIRES = [
# # only add things here that have corresponding apt python3-packages available
# # anything added here also needs to be added to our package dependencies in
# # stdeb.cfg (apt), archivebox.rb (brew), Dockerfile, etc.
# # if there is no apt python3-package equivalent, then vendor it instead in
# # ./archivebox/vendor/
# "requests>=2.24.0",
# "mypy-extensions>=0.4.3",
# "django>=3.1.3,<3.2",
# "django-extensions>=3.0.3",
# "dateparser>=1.0.0",
# "youtube-dl>=2021.04.17",
# "yt-dlp>=2021.4.11",
# "python-crontab>=2.5.1",
# "croniter>=0.3.34",
# "w3lib>=1.22.0",
# "ipython>5.0.0",
# ]
# EXTRAS_REQUIRE = {
# 'sonic': [
# "sonic-client>=0.0.5",
# ],
# 'ldap': [
# "django-auth-ldap>=4.1.0",
# ],
# 'dev': [
# "setuptools",
# "twine",
# "wheel",
# "flake8",
# "ipdb",
# "mypy",
# "django-stubs",
# "sphinx",
# "sphinx-rtd-theme",
# "recommonmark",
# "pytest",
# "bottle",
# "stdeb",
# "django-debug-toolbar",
# "djdt_flamegraph",
# ],
# }
#
# setuptools.setup(
# name=PKG_NAME,
# version=VERSION,
# license=LICENSE,
# author=AUTHOR,
# author_email=AUTHOR_EMAIL,
# description=DESCRIPTION,
# long_description=README,
# long_description_content_type="text/markdown",
# url=REPO_URL,
# project_urls=PROJECT_URLS,
# python_requires=PYTHON_REQUIRES,
# setup_requires=SETUP_REQUIRES,
# install_requires=INSTALL_REQUIRES,
# extras_require=EXTRAS_REQUIRE,
# packages=[PKG_NAME],
# include_package_data=True, # see MANIFEST.in
# entry_points={
# "console_scripts": [
# f"{PKG_NAME} = {PKG_NAME}.cli:main",
# ],
# },
# classifiers=[
# "License :: OSI Approved :: MIT License",
# "Natural Language :: English",
# "Operating System :: OS Independent",
# "Development Status :: 4 - Beta",
class DisabledTestCommand(test):
def run(self):
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
print()
print('[X] Running tests via setup.py test is deprecated.')
print(' Hint: Use the ./bin/test.sh script or pytest instead')
# "Topic :: Utilities",
# "Topic :: System :: Archiving",
# "Topic :: System :: Archiving :: Backup",
# "Topic :: System :: Recovery Tools",
# "Topic :: Sociology :: History",
# "Topic :: Internet :: WWW/HTTP",
# "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
# "Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
# "Topic :: Software Development :: Libraries :: Python Modules",
setuptools.setup(
name=PKG_NAME,
version=VERSION,
license=LICENSE,
author=AUTHOR,
author_email=AUTHOR_EMAIL,
description=DESCRIPTION,
long_description=README,
long_description_content_type="text/markdown",
url=REPO_URL,
project_urls=PROJECT_URLS,
python_requires=PYTHON_REQUIRES,
setup_requires=SETUP_REQUIRES,
install_requires=INSTALL_REQUIRES,
extras_require=EXTRAS_REQUIRE,
packages=[PKG_NAME],
include_package_data=True, # see MANIFEST.in
entry_points={
"console_scripts": [
f"{PKG_NAME} = {PKG_NAME}.cli:main",
],
},
classifiers=[
"License :: OSI Approved :: MIT License",
"Natural Language :: English",
"Operating System :: OS Independent",
"Development Status :: 4 - Beta",
"Topic :: Utilities",
"Topic :: System :: Archiving",
"Topic :: System :: Archiving :: Backup",
"Topic :: System :: Recovery Tools",
"Topic :: Sociology :: History",
"Topic :: Internet :: WWW/HTTP",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
"Topic :: Internet :: WWW/HTTP :: WSGI :: Application",
"Topic :: Software Development :: Libraries :: Python Modules",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: End Users/Desktop",
"Intended Audience :: Information Technology",
"Intended Audience :: Legal Industry",
"Intended Audience :: System Administrators",
# "Intended Audience :: Developers",
# "Intended Audience :: Education",
# "Intended Audience :: End Users/Desktop",
# "Intended Audience :: Information Technology",
# "Intended Audience :: Legal Industry",
# "Intended Audience :: System Administrators",
"Environment :: Console",
"Environment :: Web Environment",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Framework :: Django",
"Typing :: Typed",
],
cmdclass={
"test": DisabledTestCommand,
},
)
# "Environment :: Console",
# "Environment :: Web Environment",
# "Programming Language :: Python :: 3",
# "Programming Language :: Python :: 3.7",
# "Programming Language :: Python :: 3.8",
# "Programming Language :: Python :: 3.9",
# "Framework :: Django",
# "Typing :: Typed",
# ],
# cmdclass={
# "test": DisabledTestCommand,
# },
# )

View file

@ -6,6 +6,6 @@ Suite: focal
Suite3: focal
Build-Depends: debhelper, dh-python, python3-all, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, wget, curl, git, ffmpeg, youtube-dl, yt-dlp, python3-all, python3-pip, python3-setuptools, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
X-Python3-Version: >= 3.7
XS-Python-Version: >= 3.7
X-Python3-Version: >= 3.9
XS-Python-Version: >= 3.9
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck