Merge pull request #652 from ArchiveBox/dev

This commit is contained in:
Nick Sweeting 2021-02-09 16:24:30 +02:00 committed by GitHub
commit 9766ea21a7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 99 additions and 46 deletions

View file

@ -75,7 +75,7 @@ jobs:
tags: ${{ steps.docker_meta.outputs.tags }}
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
platforms: linux/amd64,linux/386,linux/arm64,linux/arm/v7
platforms: linux/amd64,linux/arm64,linux/arm/v7
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}

View file

@ -63,6 +63,7 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
&& apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
nodejs \
# && npm install -g npm \
&& rm -rf /var/lib/apt/lists/*
# Install Node dependencies
@ -82,6 +83,7 @@ ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.e
RUN apt-get update -qq \
&& apt-get install -qq -y --no-install-recommends \
build-essential python-dev python3-dev \
# && pip install --upgrade pip \
&& grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
&& pip install --quiet "sonic-client==0.0.5" \
&& apt-get purge -y build-essential python-dev python3-dev \

View file

@ -2,3 +2,5 @@ graft archivebox
global-exclude .DS_Store
global-exclude __pycache__
global-exclude *.pyc
prune tests/

View file

@ -82,8 +82,9 @@ archivebox help
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
<br/><br/>
<img src="https://i.imgur.com/njxgSbl.png" width="22%" alt="cli init screenshot" align="top">
<img src="https://i.imgur.com/lUuicew.png" width="22%" alt="cli init screenshot" align="top">
<img src="https://i.imgur.com/p6wK6KM.png" width="22%" alt="server snapshot admin screenshot" align="top">
<img src="https://i.imgur.com/RefWsXB.jpg" width="28.6%" alt="server snapshot details page screenshot" align="top"/>
<img src="https://i.imgur.com/xHvQfon.png" width="28.6%" alt="server snapshot details page screenshot" align="top"/>
<br/>
<br/>
<img src="https://i.imgur.com/T2UAGUD.png" width="49%" alt="grass"/><img src="https://i.imgur.com/T2UAGUD.png" width="49%" alt="grass"/>
@ -266,10 +267,7 @@ No matter which install method you choose, they all roughly follow this 3-step p
<br/>
<div align="center">
<img src="https://i.imgur.com/lUuicew.png" width="22.4%" align="top">
<img src="https://i.imgur.com/p6wK6KM.png" width="35.9%" align="top">
<img src="https://i.imgur.com/pzq4uXq.png" width="29.7%" align="top">
<br/><br/>
<br/>
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
<br/><br/>
<a href="https://archivebox.zervice.io">DEMO: <code>https://archivebox.zervice.io</code></a><br/>
@ -327,6 +325,14 @@ All of ArchiveBox's state (including the index, snapshot data, and config file)
The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.)
```bash
# to browse your index statically without running the archivebox server, run:
archivebox list --html --with-headers > index.html
archivebox list --json --with-headers > index.json
# then open the static index in a browser
open index.html
# or browse the snapshots via filesystem directly
ls ./archive/<timestamp>/
```
@ -346,6 +352,12 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file.
```bash
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
archivebox config --set YOUTUBEDL_ARGS='--max-filesize=500m'
archivebox config --help
```
<div align="center">
<img src="https://i.imgur.com/ucyimDX.png" width="96%" alt="lego graphic">
</div>
@ -445,7 +457,7 @@ archivebox add 'https://example.com#2020-10-25'
<img src="https://i.imgur.com/p6wK6KM.png" alt="archivebox server list">
</td>
<td>
<img src="https://i.imgur.com/pzq4uXq.png" alt="archivebox server detail">
<img src="https://i.imgur.com/xHvQfon.png" alt="archivebox server detail">
</td>
</tr>
</tbody>

View file

@ -1079,6 +1079,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
if check_db:
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
assert sql_index_path.exists(), (
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
except KeyboardInterrupt:
raise SystemExit(2)

View file

@ -36,8 +36,25 @@ def forwards_func(apps, schema_editor):
for extractor in history:
for result in history[extractor]:
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"] or 'unknown',
start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
try:
ArchiveResult.objects.create(
extractor=extractor,
snapshot=snapshot,
pwd=result["pwd"],
cmd=result.get("cmd") or [],
cmd_version=result.get("cmd_version") or 'unknown',
start_ts=result["start_ts"],
end_ts=result["end_ts"],
status=result["status"],
output=result.get("output") or 'null',
)
except Exception as e:
print(
' ! Skipping import due to missing/invalid index.json:',
out_dir,
e,
'(open an issue with this index.json for help)',
)
def verify_json_index_integrity(snapshot):

View file

@ -55,11 +55,11 @@ class ArchiveResult:
assert isinstance(self.end_ts, datetime)
assert isinstance(self.cmd, list)
assert all(isinstance(arg, str) and arg for arg in self.cmd)
assert self.pwd is None or isinstance(self.pwd, str) and self.pwd
assert self.cmd_version is None or isinstance(self.cmd_version, str) and self.cmd_version
# TODO: replace emptystrings in these three with None / remove them from the DB
assert self.pwd is None or isinstance(self.pwd, str)
assert self.cmd_version is None or isinstance(self.cmd_version, str)
assert self.output is None or isinstance(self.output, (str, Exception))
if isinstance(self.output, str):
assert self.output
@classmethod
def guess_ts(_cls, dict_info):

View file

@ -10,14 +10,6 @@ set -o nounset
set -o pipefail
IFS=$'\n'
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
fi
cd "$REPO_DIR"
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Linux"
@ -26,30 +18,27 @@ if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
exit 0
fi
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
DEBIAN_VERSION="1"
PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
# make sure you have this in ~/.dput.cf:
# [archivebox-ppa]
# fqdn: ppa.launchpad.net
# method: ftp
# incoming: ~archivebox/ubuntu/archivebox/
# login: anonymous
# allow_unsigned_uploads: 0
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
cd "$REPO_DIR"
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
source "$REPO_DIR/.venv/bin/activate"
else
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
fi
# cleanup build artifacts
rm -Rf build deb_dist dist archivebox-*.tar.gz
# make sure the stdeb.cfg file is up-to-date with all the dependencies
# build source and binary packages
# make sure the stdeb.cfg file is up-to-date with all the dependencies
python3 setup.py --command-packages=stdeb.command \
sdist_dsc --debian-version=$DEBIAN_VERSION \
bdist_deb
# sign the build with your PGP key ID
debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
# push the build to launchpad ppa
# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
# should output deb_dist/archivebox_0.5.4-1.{deb,changes,buildinfo,tar.gz}

View file

@ -10,11 +10,41 @@ set -o nounset
set -o pipefail
IFS=$'\n'
CURRENT_PLAFORM="$(uname)"
REQUIRED_PLATFORM="Linux"
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
exit 0
fi
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
cd "$REPO_DIR"
echo "[+] Loading PGP keys from env vars and filesystem..."
# https://github.com/ArchiveBox/debian-archivebox/settings/secrets/actions
PGP_KEY_ID="${PGP_KEY_ID:-BC2D21B0D84E16C437300B8652423FBED1586F45}"
[[ "${PGP_PUBLIC_KEY:-}" ]] && echo "$PGP_PUBLIC_KEY" > /tmp/archivebox_gpg.key.pub
[[ "${PGP_PRIVATE_KEY:-}" ]] && echo "$PGP_PRIVATE_KEY" > /tmp/archivebox_gpg.key
gpg --import /tmp/archivebox_gpg.key.pub || true
gpg --import --allow-secret-key-import /tmp/archivebox_gpg.key || true
echo "$PGP_KEY_ID:6:" | gpg --import-ownertrust || true
echo "[*] Signing build and changelog with PGP..."
debsign --re-sign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
# make sure you have this in ~/.dput.cf:
# [archivebox-ppa]
# fqdn: ppa.launchpad.net
# method: ftp
# incoming: ~archivebox/ubuntu/archivebox/
# login: anonymous
# allow_unsigned_uploads: 0
echo "[^] Uploading to launchpad.net"
dput archivebox "deb_dist/archivebox_${VERSION}-1_source.changes"
dput -f archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"

View file

@ -19,6 +19,7 @@ cd "$REPO_DIR"
echo "[^] Uploading docker image"
# docker login --username=nikisweeting
# docker login docker.pkg.github.com --username=pirate
docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest
docker push docker.io/nikisweeting/archivebox
docker push docker.io/archivebox/archivebox
docker push docker.pkg.github.com/archivebox/archivebox/archivebox

View file

@ -1,6 +1,6 @@
{
"name": "archivebox",
"version": "0.5.4",
"version": "0.5.6",
"description": "ArchiveBox: The self-hosted internet archive",
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
"license": "MIT",

View file

@ -33,11 +33,10 @@ VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['versio
# print('>', sys.executable, *sys.argv)
class CustomTest(test):
class DisabledTestCommand(test):
def run(self):
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
#super().run()
pass
print('Use the ./bin/test.sh script to run tests, not setup.py test.')
setuptools.setup(
@ -129,6 +128,6 @@ setuptools.setup(
"Typing :: Typed",
],
cmdclass={
"test": CustomTest,
"test": DisabledTestCommand,
},
)

View file

@ -7,3 +7,4 @@ Suite3: focal
Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-atomicwrites, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
XS-Python-Version: >= 3.7
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck