mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-23 04:33:11 +00:00
Merge pull request #652 from ArchiveBox/dev
This commit is contained in:
commit
9766ea21a7
13 changed files with 99 additions and 46 deletions
2
.github/workflows/docker.yml
vendored
2
.github/workflows/docker.yml
vendored
|
@ -75,7 +75,7 @@ jobs:
|
|||
tags: ${{ steps.docker_meta.outputs.tags }}
|
||||
cache-from: type=local,src=/tmp/.buildx-cache
|
||||
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||
platforms: linux/amd64,linux/386,linux/arm64,linux/arm/v7
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7
|
||||
|
||||
- name: Image digest
|
||||
run: echo ${{ steps.docker_build.outputs.digest }}
|
||||
|
|
|
@ -63,6 +63,7 @@ RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add -
|
|||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
nodejs \
|
||||
# && npm install -g npm \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Node dependencies
|
||||
|
@ -82,6 +83,7 @@ ADD ./pip_dist/archivebox.egg-info/requires.txt "$CODE_DIR/pip_dist/archivebox.e
|
|||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
build-essential python-dev python3-dev \
|
||||
# && pip install --upgrade pip \
|
||||
&& grep -B 1000 -E '^$' "$CODE_DIR/pip_dist/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
|
||||
&& pip install --quiet "sonic-client==0.0.5" \
|
||||
&& apt-get purge -y build-essential python-dev python3-dev \
|
||||
|
|
|
@ -2,3 +2,5 @@ graft archivebox
|
|||
global-exclude .DS_Store
|
||||
global-exclude __pycache__
|
||||
global-exclude *.pyc
|
||||
|
||||
prune tests/
|
||||
|
|
26
README.md
26
README.md
|
@ -82,8 +82,9 @@ archivebox help
|
|||
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
|
||||
<br/><br/>
|
||||
<img src="https://i.imgur.com/njxgSbl.png" width="22%" alt="cli init screenshot" align="top">
|
||||
<img src="https://i.imgur.com/lUuicew.png" width="22%" alt="cli init screenshot" align="top">
|
||||
<img src="https://i.imgur.com/p6wK6KM.png" width="22%" alt="server snapshot admin screenshot" align="top">
|
||||
<img src="https://i.imgur.com/RefWsXB.jpg" width="28.6%" alt="server snapshot details page screenshot" align="top"/>
|
||||
<img src="https://i.imgur.com/xHvQfon.png" width="28.6%" alt="server snapshot details page screenshot" align="top"/>
|
||||
<br/>
|
||||
<br/>
|
||||
<img src="https://i.imgur.com/T2UAGUD.png" width="49%" alt="grass"/><img src="https://i.imgur.com/T2UAGUD.png" width="49%" alt="grass"/>
|
||||
|
@ -266,10 +267,7 @@ No matter which install method you choose, they all roughly follow this 3-step p
|
|||
<br/>
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/lUuicew.png" width="22.4%" align="top">
|
||||
<img src="https://i.imgur.com/p6wK6KM.png" width="35.9%" align="top">
|
||||
<img src="https://i.imgur.com/pzq4uXq.png" width="29.7%" align="top">
|
||||
<br/><br/>
|
||||
<br/>
|
||||
<sub>. . . . . . . . . . . . . . . . . . . . . . . . . . . .</sub>
|
||||
<br/><br/>
|
||||
<a href="https://archivebox.zervice.io">DEMO: <code>https://archivebox.zervice.io</code></a><br/>
|
||||
|
@ -327,7 +325,15 @@ All of ArchiveBox's state (including the index, snapshot data, and config file)
|
|||
The on-disk layout is optimized to be easy to browse by hand and durable long-term. The main index is a standard sqlite3 database (it can also be exported as static JSON/HTML), and the archive snapshots are organized by date-added timestamp in the `archive/` subfolder. Each snapshot subfolder includes a static JSON and HTML index describing its contents, and the snapshot extrator outputs are plain files within the folder (e.g. `media/example.mp4`, `git/somerepo.git`, `static/someimage.png`, etc.)
|
||||
|
||||
```bash
|
||||
ls ./archive/<timestamp>/
|
||||
# to browse your index statically without running the archivebox server, run:
|
||||
archivebox list --html --with-headers > index.html
|
||||
archivebox list --json --with-headers > index.json
|
||||
|
||||
# then open the static index in a browser
|
||||
open index.html
|
||||
|
||||
# or browse the snapshots via filesystem directly
|
||||
ls ./archive/<timestamp>/
|
||||
```
|
||||
|
||||
- **Index:** `index.html` & `index.json` HTML and JSON index files containing metadata and details
|
||||
|
@ -346,6 +352,12 @@ The on-disk layout is optimized to be easy to browse by hand and durable long-te
|
|||
|
||||
It does everything out-of-the-box by default, but you can disable or tweak [individual archive methods](https://github.com/ArchiveBox/ArchiveBox/wiki/Configuration) via environment variables or config file.
|
||||
|
||||
```bash
|
||||
archivebox config --set SAVE_ARCHIVE_DOT_ORG=False
|
||||
archivebox config --set YOUTUBEDL_ARGS='--max-filesize=500m'
|
||||
archivebox config --help
|
||||
```
|
||||
|
||||
<div align="center">
|
||||
<img src="https://i.imgur.com/ucyimDX.png" width="96%" alt="lego graphic">
|
||||
</div>
|
||||
|
@ -445,7 +457,7 @@ archivebox add 'https://example.com#2020-10-25'
|
|||
<img src="https://i.imgur.com/p6wK6KM.png" alt="archivebox server list">
|
||||
</td>
|
||||
<td>
|
||||
<img src="https://i.imgur.com/pzq4uXq.png" alt="archivebox server detail">
|
||||
<img src="https://i.imgur.com/xHvQfon.png" alt="archivebox server detail">
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
|
|
|
@ -1079,6 +1079,6 @@ def setup_django(out_dir: Path=None, check_db=False, config: ConfigDict=CONFIG,
|
|||
if check_db:
|
||||
sql_index_path = Path(output_dir) / SQL_INDEX_FILENAME
|
||||
assert sql_index_path.exists(), (
|
||||
f'No database file {SQL_INDEX_FILENAME} found in OUTPUT_DIR: {config["OUTPUT_DIR"]}')
|
||||
f'No database file {SQL_INDEX_FILENAME} found in: {config["OUTPUT_DIR"]} (Are you in an ArchiveBox collection directory?)')
|
||||
except KeyboardInterrupt:
|
||||
raise SystemExit(2)
|
||||
|
|
|
@ -36,8 +36,25 @@ def forwards_func(apps, schema_editor):
|
|||
|
||||
for extractor in history:
|
||||
for result in history[extractor]:
|
||||
ArchiveResult.objects.create(extractor=extractor, snapshot=snapshot, cmd=result["cmd"], cmd_version=result["cmd_version"] or 'unknown',
|
||||
start_ts=result["start_ts"], end_ts=result["end_ts"], status=result["status"], pwd=result["pwd"], output=result["output"])
|
||||
try:
|
||||
ArchiveResult.objects.create(
|
||||
extractor=extractor,
|
||||
snapshot=snapshot,
|
||||
pwd=result["pwd"],
|
||||
cmd=result.get("cmd") or [],
|
||||
cmd_version=result.get("cmd_version") or 'unknown',
|
||||
start_ts=result["start_ts"],
|
||||
end_ts=result["end_ts"],
|
||||
status=result["status"],
|
||||
output=result.get("output") or 'null',
|
||||
)
|
||||
except Exception as e:
|
||||
print(
|
||||
' ! Skipping import due to missing/invalid index.json:',
|
||||
out_dir,
|
||||
e,
|
||||
'(open an issue with this index.json for help)',
|
||||
)
|
||||
|
||||
|
||||
def verify_json_index_integrity(snapshot):
|
||||
|
|
|
@ -55,11 +55,11 @@ class ArchiveResult:
|
|||
assert isinstance(self.end_ts, datetime)
|
||||
assert isinstance(self.cmd, list)
|
||||
assert all(isinstance(arg, str) and arg for arg in self.cmd)
|
||||
assert self.pwd is None or isinstance(self.pwd, str) and self.pwd
|
||||
assert self.cmd_version is None or isinstance(self.cmd_version, str) and self.cmd_version
|
||||
|
||||
# TODO: replace emptystrings in these three with None / remove them from the DB
|
||||
assert self.pwd is None or isinstance(self.pwd, str)
|
||||
assert self.cmd_version is None or isinstance(self.cmd_version, str)
|
||||
assert self.output is None or isinstance(self.output, (str, Exception))
|
||||
if isinstance(self.output, str):
|
||||
assert self.output
|
||||
|
||||
@classmethod
|
||||
def guess_ts(_cls, dict_info):
|
||||
|
|
|
@ -10,14 +10,6 @@ set -o nounset
|
|||
set -o pipefail
|
||||
IFS=$'\n'
|
||||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
else
|
||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
||||
fi
|
||||
cd "$REPO_DIR"
|
||||
|
||||
CURRENT_PLAFORM="$(uname)"
|
||||
REQUIRED_PLATFORM="Linux"
|
||||
|
@ -26,30 +18,27 @@ if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
|
|||
exit 0
|
||||
fi
|
||||
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
DEBIAN_VERSION="1"
|
||||
PGP_KEY_ID="7D5695D3B618872647861D51C38137A7C1675988"
|
||||
# make sure you have this in ~/.dput.cf:
|
||||
# [archivebox-ppa]
|
||||
# fqdn: ppa.launchpad.net
|
||||
# method: ftp
|
||||
# incoming: ~archivebox/ubuntu/archivebox/
|
||||
# login: anonymous
|
||||
# allow_unsigned_uploads: 0
|
||||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
|
||||
if [[ -f "$REPO_DIR/.venv/bin/activate" ]]; then
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
else
|
||||
echo "[!] Warning: No virtualenv presesnt in $REPO_DIR.venv"
|
||||
fi
|
||||
|
||||
# cleanup build artifacts
|
||||
rm -Rf build deb_dist dist archivebox-*.tar.gz
|
||||
|
||||
# make sure the stdeb.cfg file is up-to-date with all the dependencies
|
||||
|
||||
# build source and binary packages
|
||||
# make sure the stdeb.cfg file is up-to-date with all the dependencies
|
||||
python3 setup.py --command-packages=stdeb.command \
|
||||
sdist_dsc --debian-version=$DEBIAN_VERSION \
|
||||
bdist_deb
|
||||
|
||||
# sign the build with your PGP key ID
|
||||
debsign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
||||
|
||||
# push the build to launchpad ppa
|
||||
# dput archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
||||
# should output deb_dist/archivebox_0.5.4-1.{deb,changes,buildinfo,tar.gz}
|
||||
|
|
|
@ -10,11 +10,41 @@ set -o nounset
|
|||
set -o pipefail
|
||||
IFS=$'\n'
|
||||
|
||||
|
||||
CURRENT_PLAFORM="$(uname)"
|
||||
REQUIRED_PLATFORM="Linux"
|
||||
if [[ "$CURRENT_PLAFORM" != "$REQUIRED_PLATFORM" ]]; then
|
||||
echo "[!] Skipping the Debian package build on $CURRENT_PLAFORM (it can only be run on $REQUIRED_PLATFORM)."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
SHORT_VERSION="$(echo "$VERSION" | perl -pe 's/(\d+)\.(\d+)\.(\d+)/$1.$2/g')"
|
||||
DEBIAN_VERSION="${DEBIAN_VERSION:-1}"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
|
||||
echo "[+] Loading PGP keys from env vars and filesystem..."
|
||||
# https://github.com/ArchiveBox/debian-archivebox/settings/secrets/actions
|
||||
PGP_KEY_ID="${PGP_KEY_ID:-BC2D21B0D84E16C437300B8652423FBED1586F45}"
|
||||
[[ "${PGP_PUBLIC_KEY:-}" ]] && echo "$PGP_PUBLIC_KEY" > /tmp/archivebox_gpg.key.pub
|
||||
[[ "${PGP_PRIVATE_KEY:-}" ]] && echo "$PGP_PRIVATE_KEY" > /tmp/archivebox_gpg.key
|
||||
gpg --import /tmp/archivebox_gpg.key.pub || true
|
||||
gpg --import --allow-secret-key-import /tmp/archivebox_gpg.key || true
|
||||
echo "$PGP_KEY_ID:6:" | gpg --import-ownertrust || true
|
||||
|
||||
echo "[*] Signing build and changelog with PGP..."
|
||||
debsign --re-sign -k "$PGP_KEY_ID" "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
||||
|
||||
# make sure you have this in ~/.dput.cf:
|
||||
# [archivebox-ppa]
|
||||
# fqdn: ppa.launchpad.net
|
||||
# method: ftp
|
||||
# incoming: ~archivebox/ubuntu/archivebox/
|
||||
# login: anonymous
|
||||
# allow_unsigned_uploads: 0
|
||||
|
||||
|
||||
echo "[^] Uploading to launchpad.net"
|
||||
dput archivebox "deb_dist/archivebox_${VERSION}-1_source.changes"
|
||||
dput -f archivebox "deb_dist/archivebox_${VERSION}-${DEBIAN_VERSION}_source.changes"
|
||||
|
|
|
@ -19,6 +19,7 @@ cd "$REPO_DIR"
|
|||
echo "[^] Uploading docker image"
|
||||
# docker login --username=nikisweeting
|
||||
# docker login docker.pkg.github.com --username=pirate
|
||||
docker push archivebox/archivebox:$VERSION archivebox/archivebox:$SHORT_VERSION archivebox/archivebox:latest
|
||||
docker push docker.io/nikisweeting/archivebox
|
||||
docker push docker.io/archivebox/archivebox
|
||||
docker push docker.pkg.github.com/archivebox/archivebox/archivebox
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "archivebox",
|
||||
"version": "0.5.4",
|
||||
"version": "0.5.6",
|
||||
"description": "ArchiveBox: The self-hosted internet archive",
|
||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||
"license": "MIT",
|
||||
|
|
7
setup.py
7
setup.py
|
@ -33,11 +33,10 @@ VERSION = json.loads((PACKAGE_DIR / "package.json").read_text().strip())['versio
|
|||
# print('>', sys.executable, *sys.argv)
|
||||
|
||||
|
||||
class CustomTest(test):
|
||||
class DisabledTestCommand(test):
|
||||
def run(self):
|
||||
# setup.py test is deprecated, disable it here by force so stdeb doesnt run it
|
||||
#super().run()
|
||||
pass
|
||||
print('Use the ./bin/test.sh script to run tests, not setup.py test.')
|
||||
|
||||
|
||||
setuptools.setup(
|
||||
|
@ -129,6 +128,6 @@ setuptools.setup(
|
|||
"Typing :: Typed",
|
||||
],
|
||||
cmdclass={
|
||||
"test": CustomTest,
|
||||
"test": DisabledTestCommand,
|
||||
},
|
||||
)
|
||||
|
|
|
@ -7,3 +7,4 @@ Suite3: focal
|
|||
Build-Depends: dh-python, python3-pip, python3-setuptools, python3-wheel, python3-stdeb
|
||||
Depends3: nodejs, chromium-browser, wget, curl, git, ffmpeg, youtube-dl, python3-atomicwrites, python3-croniter, python3-crontab, python3-dateparser, python3-django, python3-django-extensions, python3-django-jsonfield, python3-mypy-extensions, python3-requests, python3-w3lib, ripgrep
|
||||
XS-Python-Version: >= 3.7
|
||||
Setup-Env-Vars: DEB_BUILD_OPTIONS=nocheck
|
||||
|
|
Loading…
Reference in a new issue