mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-10 06:34:16 +00:00
Merge branch 'master' into node_config
This commit is contained in:
commit
75deaf293d
25 changed files with 224 additions and 154 deletions
19
.npmignore
Normal file
19
.npmignore
Normal file
|
@ -0,0 +1,19 @@
|
|||
tests/
|
||||
archivebox/
|
||||
archivebox.egg-info/
|
||||
build/
|
||||
dist/
|
||||
docs/
|
||||
etc/
|
||||
.github
|
||||
.gitmodules
|
||||
.dockerignore
|
||||
.flake8
|
||||
CNAME
|
||||
_config.yml
|
||||
docker-compose.yaml
|
||||
docker-compose.yml
|
||||
Dockerfile
|
||||
MANIFEST.in
|
||||
Pipfile
|
||||
setup.py
|
|
@ -1,4 +1,4 @@
|
|||
include LICENSE
|
||||
include README.md
|
||||
include archivebox/VERSION
|
||||
recursive-include archivebox/themes *
|
||||
graft archivebox
|
||||
global-exclude .DS_Store
|
||||
global-exclude __pycache__
|
||||
global-exclude *.pyc
|
||||
|
|
|
@ -63,10 +63,10 @@ To get started, you can [install them manually](https://github.com/pirate/Archiv
|
|||
```bash
|
||||
# Docker
|
||||
mkdir data && cd data
|
||||
docker run -v $PWD:/data nikisweeting/archivebox init
|
||||
docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox init
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
|
||||
docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
|
||||
docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
Metadata-Version: 2.1
|
||||
Name: archivebox
|
||||
Version: 0.4.17
|
||||
Version: 0.4.20
|
||||
Summary: The self-hosted internet archive.
|
||||
Home-page: https://github.com/pirate/ArchiveBox
|
||||
Author: Nick Sweeting
|
||||
|
@ -77,10 +77,10 @@ Description: <div align="center">
|
|||
```bash
|
||||
# Docker
|
||||
mkdir data && cd data
|
||||
docker run -v $PWD:/data nikisweeting/archivebox init
|
||||
docker run -v $PWD:/data nikisweeting/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox init
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox add 'https://example.com'
|
||||
docker run -v $PWD:/data -it nikisweeting/archivebox manage createsuperuser
|
||||
docker run -v $PWD:/data -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
|
||||
docker run -v $PWD:/data -it -p 8000:8000 nikisweeting/archivebox server 0.0.0.0:8000
|
||||
open http://127.0.0.1:8000
|
||||
```
|
||||
|
||||
|
|
|
@ -1,13 +1,16 @@
|
|||
LICENSE
|
||||
MANIFEST.in
|
||||
README.md
|
||||
setup.py
|
||||
archivebox/VERSION
|
||||
archivebox/.flake8
|
||||
archivebox/LICENSE
|
||||
archivebox/README.md
|
||||
archivebox/__init__.py
|
||||
archivebox/__main__.py
|
||||
archivebox/logging_util.py
|
||||
archivebox/main.py
|
||||
archivebox/manage.py
|
||||
archivebox/mypy.ini
|
||||
archivebox/package.json
|
||||
archivebox/system.py
|
||||
archivebox/util.py
|
||||
archivebox.egg-info/PKG-INFO
|
||||
|
@ -45,6 +48,7 @@ archivebox/core/urls.py
|
|||
archivebox/core/views.py
|
||||
archivebox/core/welcome_message.py
|
||||
archivebox/core/wsgi.py
|
||||
archivebox/core/management/commands/archivebox.py
|
||||
archivebox/core/migrations/0001_initial.py
|
||||
archivebox/core/migrations/0002_auto_20200625_1521.py
|
||||
archivebox/core/migrations/0003_auto_20200630_1034.py
|
||||
|
@ -110,16 +114,4 @@ archivebox/themes/legacy/static/jquery.min.js
|
|||
archivebox/themes/legacy/static/sort_asc.png
|
||||
archivebox/themes/legacy/static/sort_both.png
|
||||
archivebox/themes/legacy/static/sort_desc.png
|
||||
archivebox/themes/legacy/static/spinner.gif
|
||||
tests/__init__.py
|
||||
tests/conftest.py
|
||||
tests/fixtures.py
|
||||
tests/test_args.py
|
||||
tests/test_extractors.py
|
||||
tests/test_init.py
|
||||
tests/test_oneshot.py
|
||||
tests/test_remove.py
|
||||
tests/test_title.py
|
||||
tests/test_util.py
|
||||
tests/mock_server/__init__.py
|
||||
tests/mock_server/server.py
|
||||
archivebox/themes/legacy/static/spinner.gif
|
|
@ -1,2 +1 @@
|
|||
archivebox
|
||||
tests
|
||||
|
|
1
archivebox/LICENSE
Symbolic link
1
archivebox/LICENSE
Symbolic link
|
@ -0,0 +1 @@
|
|||
../LICENSE
|
1
archivebox/README.md
Symbolic link
1
archivebox/README.md
Symbolic link
|
@ -0,0 +1 @@
|
|||
../README.md
|
|
@ -1 +0,0 @@
|
|||
0.4.17
|
|
@ -104,11 +104,11 @@ def main(args: Optional[List[str]]=NotProvided, stdin: Optional[IO]=NotProvided,
|
|||
)
|
||||
command = parser.parse_args(args or ())
|
||||
|
||||
if command.help or command.subcommand is None:
|
||||
command.subcommand = 'help'
|
||||
elif command.version:
|
||||
if command.version:
|
||||
command.subcommand = 'version'
|
||||
|
||||
elif command.help or command.subcommand is None:
|
||||
command.subcommand = 'help'
|
||||
|
||||
if command.subcommand not in ('help', 'version', 'status'):
|
||||
from ..logging_util import log_cli_command
|
||||
|
||||
|
|
|
@ -4,10 +4,11 @@ import os
|
|||
import io
|
||||
import re
|
||||
import sys
|
||||
import django
|
||||
import json
|
||||
import getpass
|
||||
import shutil
|
||||
import platform
|
||||
import django
|
||||
|
||||
from hashlib import md5
|
||||
from pathlib import Path
|
||||
|
@ -58,7 +59,7 @@ CONFIG_DEFAULTS: Dict[str, ConfigDefaultDict] = {
|
|||
'MEDIA_TIMEOUT': {'type': int, 'default': 3600},
|
||||
'OUTPUT_PERMISSIONS': {'type': str, 'default': '755'},
|
||||
'RESTRICT_FILE_NAMES': {'type': str, 'default': 'windows'},
|
||||
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
||||
'URL_BLACKLIST': {'type': str, 'default': r'\.(css|js|otf|ttf|woff|woff2|gstatic\.com|googleapis\.com/css)(\?.*)?$'}, # to avoid downloading code assets as their own pages
|
||||
},
|
||||
|
||||
'SERVER_CONFIG': {
|
||||
|
@ -186,7 +187,6 @@ STATICFILE_EXTENSIONS = {
|
|||
# html, htm, shtml, xhtml, xml, aspx, php, cgi
|
||||
}
|
||||
|
||||
VERSION_FILENAME = 'VERSION'
|
||||
PYTHON_DIR_NAME = 'archivebox'
|
||||
TEMPLATES_DIR_NAME = 'themes'
|
||||
|
||||
|
@ -232,10 +232,10 @@ DERIVED_CONFIG_DEFAULTS: ConfigDefaultDict = {
|
|||
'CONFIG_FILE': {'default': lambda c: os.path.abspath(os.path.expanduser(c['CONFIG_FILE'])) if c['CONFIG_FILE'] else os.path.join(c['OUTPUT_DIR'], CONFIG_FILENAME)},
|
||||
'COOKIES_FILE': {'default': lambda c: c['COOKIES_FILE'] and os.path.abspath(os.path.expanduser(c['COOKIES_FILE']))},
|
||||
'CHROME_USER_DATA_DIR': {'default': lambda c: find_chrome_data_dir() if c['CHROME_USER_DATA_DIR'] is None else (os.path.abspath(os.path.expanduser(c['CHROME_USER_DATA_DIR'])) or None)},
|
||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'], re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||
'URL_BLACKLIST_PTN': {'default': lambda c: c['URL_BLACKLIST'] and re.compile(c['URL_BLACKLIST'] or '', re.IGNORECASE | re.UNICODE | re.MULTILINE)},
|
||||
|
||||
'ARCHIVEBOX_BINARY': {'default': lambda c: sys.argv[0]},
|
||||
'VERSION': {'default': lambda c: open(os.path.join(c['PYTHON_DIR'], VERSION_FILENAME), 'r').read().strip()},
|
||||
'VERSION': {'default': lambda c: json.loads((Path(c['PYTHON_DIR']) / 'package.json').read_text().strip())['version']},
|
||||
'GIT_SHA': {'default': lambda c: c['VERSION'].split('+')[-1] or 'unknown'},
|
||||
|
||||
'PYTHON_BINARY': {'default': lambda c: sys.executable},
|
||||
|
@ -510,16 +510,9 @@ def bin_version(binary: Optional[str]) -> Optional[str]:
|
|||
return None
|
||||
|
||||
try:
|
||||
if binary.split('/')[-1] in ('single-file',):
|
||||
# these dependencies dont support the --version flag, but are valid still
|
||||
if run([abspath, "--help"], stdout=PIPE).returncode == 0:
|
||||
return '0.0.0'
|
||||
else:
|
||||
return None
|
||||
else:
|
||||
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
||||
# take first 3 columns of first line of version info
|
||||
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
||||
version_str = run([abspath, "--version"], stdout=PIPE).stdout.strip().decode()
|
||||
# take first 3 columns of first line of version info
|
||||
return ' '.join(version_str.split('\n')[0].strip().split()[:3])
|
||||
except OSError:
|
||||
pass
|
||||
# stderr(f'[X] Unable to find working version of dependency: {binary}', color='red')
|
||||
|
@ -534,6 +527,10 @@ def bin_path(binary: Optional[str]) -> Optional[str]:
|
|||
if binary is None:
|
||||
return None
|
||||
|
||||
node_modules_bin = Path('.') / 'node_modules' / '.bin' / binary
|
||||
if node_modules_bin.exists():
|
||||
return str(node_modules_bin.resolve())
|
||||
|
||||
return shutil.which(os.path.expanduser(binary)) or binary
|
||||
|
||||
def bin_hash(binary: Optional[str]) -> Optional[str]:
|
||||
|
@ -784,6 +781,10 @@ globals().update(CONFIG)
|
|||
# Timezone set as UTC
|
||||
os.environ["TZ"] = 'UTC'
|
||||
|
||||
# add ./node_modules/.bin to $PATH so we can use node scripts in extractors
|
||||
NODE_BIN_PATH = str((Path(CONFIG["OUTPUT_DIR"]).absolute() / 'node_modules' / '.bin'))
|
||||
sys.path.append(NODE_BIN_PATH)
|
||||
|
||||
|
||||
############################## Importable Checkers #############################
|
||||
|
||||
|
@ -825,16 +826,6 @@ def check_system_config(config: ConfigDict=CONFIG) -> None:
|
|||
stderr(' CHROME_USER_DATA_DIR="{}"'.format(config['CHROME_USER_DATA_DIR'].split('/Default')[0]))
|
||||
raise SystemExit(2)
|
||||
|
||||
def print_dependency_additional_info(dependency: str) -> None:
|
||||
if dependency == "SINGLEFILE_BINARY":
|
||||
hint(('npm install -g git+https://github.com/gildas-lormeau/SingleFile.git"',
|
||||
'or set SAVE_SINGLEFILE=False to silence this warning',
|
||||
''))
|
||||
if dependency == "READABILITY_BINARY":
|
||||
hint(('npm install -g git+https://github.com/pirate/readability-extractor.git"',
|
||||
'or set SAVE_READABILITY=False to silence this warning',
|
||||
''))
|
||||
|
||||
|
||||
def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
||||
invalid_dependencies = [
|
||||
|
@ -851,9 +842,10 @@ def check_dependencies(config: ConfigDict=CONFIG, show_help: bool=True) -> None:
|
|||
info['version'] or 'unable to detect version',
|
||||
)
|
||||
)
|
||||
print_dependency_additional_info(dependency)
|
||||
stderr(' {lightred}Hint:{reset} To get more info on dependencies run:'.format(**ANSI))
|
||||
stderr(' archivebox --version')
|
||||
if dependency in ('SINGLEFILE_BINARY', 'READABILITY_BINARY'):
|
||||
hint(('npm install --prefix . "git+https://github.com/pirate/ArchiveBox.git"',
|
||||
f'or archivebox config --set SAVE_{dependency.rsplit("_", 1)[0]}=False to silence this warning',
|
||||
''), prefix=' ')
|
||||
stderr('')
|
||||
|
||||
if config['TIMEOUT'] < 5:
|
||||
|
|
|
@ -31,15 +31,16 @@ class ConfigDict(BaseConfig, total=False):
|
|||
SHOW_PROGRESS: bool
|
||||
IN_DOCKER: bool
|
||||
|
||||
OUTPUT_DIR: str
|
||||
CONFIG_FILE: str
|
||||
OUTPUT_DIR: Optional[str]
|
||||
CONFIG_FILE: Optional[str]
|
||||
ONLY_NEW: bool
|
||||
TIMEOUT: int
|
||||
MEDIA_TIMEOUT: int
|
||||
OUTPUT_PERMISSIONS: str
|
||||
URL_BLACKLIST: Optional[str]
|
||||
RESTRICT_FILE_NAMES: str
|
||||
URL_BLACKLIST: str
|
||||
|
||||
SECRET_KEY: str
|
||||
SECRET_KEY: Optional[str]
|
||||
BIND_ADDR: str
|
||||
ALLOWED_HOSTS: str
|
||||
DEBUG: bool
|
||||
|
@ -52,10 +53,11 @@ class ConfigDict(BaseConfig, total=False):
|
|||
SAVE_FAVICON: bool
|
||||
SAVE_WGET: bool
|
||||
SAVE_WGET_REQUISITES: bool
|
||||
SAVE_SINGLEFILE: bool
|
||||
SAVE_READABILITY: bool
|
||||
SAVE_PDF: bool
|
||||
SAVE_SCREENSHOT: bool
|
||||
SAVE_DOM: bool
|
||||
SAVE_SINGLEFILE: bool
|
||||
SAVE_WARC: bool
|
||||
SAVE_GIT: bool
|
||||
SAVE_MEDIA: bool
|
||||
|
@ -75,53 +77,18 @@ class ConfigDict(BaseConfig, total=False):
|
|||
|
||||
USE_CURL: bool
|
||||
USE_WGET: bool
|
||||
USE_SINGLEFILE: bool
|
||||
USE_READABILITY: bool
|
||||
USE_GIT: bool
|
||||
USE_CHROME: bool
|
||||
USE_YOUTUBEDL: bool
|
||||
USE_SINGLEFILE: bool
|
||||
|
||||
CURL_BINARY: Optional[str]
|
||||
GIT_BINARY: Optional[str]
|
||||
WGET_BINARY: Optional[str]
|
||||
YOUTUBEDL_BINARY: Optional[str]
|
||||
CURL_BINARY: str
|
||||
GIT_BINARY: str
|
||||
WGET_BINARY: str
|
||||
SINGLEFILE_BINARY: str
|
||||
READABILITY_BINARY: str
|
||||
YOUTUBEDL_BINARY: str
|
||||
CHROME_BINARY: Optional[str]
|
||||
SINGLEFILE_BINARY: Optional[str]
|
||||
|
||||
TERM_WIDTH: Callable[[], int]
|
||||
USER: str
|
||||
ANSI: Dict[str, str]
|
||||
REPO_DIR: str
|
||||
PYTHON_DIR: str
|
||||
TEMPLATES_DIR: str
|
||||
ARCHIVE_DIR: str
|
||||
SOURCES_DIR: str
|
||||
LOGS_DIR: str
|
||||
|
||||
URL_BLACKLIST_PTN: Optional[Pattern]
|
||||
WGET_AUTO_COMPRESSION: bool
|
||||
|
||||
ARCHIVEBOX_BINARY: str
|
||||
VERSION: str
|
||||
GIT_SHA: str
|
||||
|
||||
PYTHON_BINARY: str
|
||||
PYTHON_ENCODING: str
|
||||
PYTHON_VERSION: str
|
||||
|
||||
DJANGO_BINARY: str
|
||||
DJANGO_VERSION: str
|
||||
|
||||
CURL_VERSION: str
|
||||
WGET_VERSION: str
|
||||
YOUTUBEDL_VERSION: str
|
||||
GIT_VERSION: str
|
||||
CHROME_VERSION: str
|
||||
|
||||
DEPENDENCIES: Dict[str, SimpleConfigValueDict]
|
||||
CODE_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||
CONFIG_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||
DATA_LOCATIONS: Dict[str, SimpleConfigValueDict]
|
||||
CHROME_OPTIONS: Dict[str, SimpleConfigValue]
|
||||
|
||||
|
||||
ConfigDefaultValueGetter = Callable[[ConfigDict], ConfigValue]
|
||||
|
|
|
@ -17,7 +17,7 @@ from ..util import (
|
|||
from ..config import (
|
||||
TIMEOUT,
|
||||
SAVE_READABILITY,
|
||||
READABILITY_BINARY,
|
||||
DEPENDENCIES,
|
||||
READABILITY_VERSION,
|
||||
)
|
||||
from ..logging_util import TimedProgress
|
||||
|
@ -73,7 +73,7 @@ def save_readability(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEO
|
|||
temp_doc.close()
|
||||
|
||||
cmd = [
|
||||
READABILITY_BINARY,
|
||||
DEPENDENCIES['READABILITY_BINARY']['path'],
|
||||
temp_doc.name
|
||||
]
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from ..util import (
|
|||
from ..config import (
|
||||
TIMEOUT,
|
||||
SAVE_SINGLEFILE,
|
||||
SINGLEFILE_BINARY,
|
||||
DEPENDENCIES,
|
||||
SINGLEFILE_VERSION,
|
||||
CHROME_BINARY,
|
||||
)
|
||||
|
@ -43,7 +43,7 @@ def save_singlefile(link: Link, out_dir: Optional[str]=None, timeout: int=TIMEOU
|
|||
|
||||
# SingleFile CLI Docs: https://github.com/gildas-lormeau/SingleFile/tree/master/cli
|
||||
cmd = [
|
||||
SINGLEFILE_BINARY,
|
||||
DEPENDENCIES['SINGLEFILE_BINARY']['path'],
|
||||
'--browser-executable-path={}'.format(CHROME_BINARY),
|
||||
'--browser-args="{}"'.format(json.dumps(browser_args[1:])),
|
||||
link.url,
|
||||
|
|
|
@ -127,7 +127,7 @@ class Link:
|
|||
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f'[{self.timestamp}] {self.base_url} "{self.title}"'
|
||||
return f'[{self.timestamp}] {self.url} "{self.title}"'
|
||||
|
||||
def __post_init__(self):
|
||||
self.typecheck()
|
||||
|
|
|
@ -99,15 +99,18 @@ class TimedProgress:
|
|||
|
||||
if self.SHOW_PROGRESS:
|
||||
# terminate if we havent already terminated
|
||||
self.p.terminate()
|
||||
self.p.join()
|
||||
self.p.close()
|
||||
|
||||
# clear whole terminal line
|
||||
try:
|
||||
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
|
||||
except (IOError, BrokenPipeError):
|
||||
# ignore when the parent proc has stopped listening to our stdout
|
||||
self.p.terminate()
|
||||
self.p.join()
|
||||
self.p.close()
|
||||
|
||||
# clear whole terminal line
|
||||
try:
|
||||
sys.stdout.write('\r{}{}\r'.format((' ' * TERM_WIDTH()), ANSI['reset']))
|
||||
except (IOError, BrokenPipeError):
|
||||
# ignore when the parent proc has stopped listening to our stdout
|
||||
pass
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
|
@ -466,7 +469,10 @@ def printable_folders(folders: Dict[str, Optional["Link"]],
|
|||
from .index.csv import links_to_csv
|
||||
return links_to_csv(folders.values(), cols=csv.split(','), header=True)
|
||||
|
||||
return '\n'.join(f'{folder} {link}' for folder, link in folders.items())
|
||||
return '\n'.join(
|
||||
f'{folder} {link and link.url} "{link and link.title}"'
|
||||
for folder, link in folders.items()
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -114,6 +114,8 @@ ALLOWED_IN_OUTPUT_DIR = {
|
|||
'venv',
|
||||
'virtualenv',
|
||||
'.virtualenv',
|
||||
'node_modules',
|
||||
'package-lock.json',
|
||||
ARCHIVE_DIR_NAME,
|
||||
SOURCES_DIR_NAME,
|
||||
LOGS_DIR_NAME,
|
||||
|
|
1
archivebox/package.json
Symbolic link
1
archivebox/package.json
Symbolic link
|
@ -0,0 +1 @@
|
|||
../package.json
|
44
bin/build.sh
Executable file
44
bin/build.sh
Executable file
|
@ -0,0 +1,44 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
### Bash Environment Setup
|
||||
# http://redsymbol.net/articles/unofficial-bash-strict-mode/
|
||||
# https://www.gnu.org/software/bash/manual/html_node/The-Set-Builtin.html
|
||||
# set -o xtrace
|
||||
set -o errexit
|
||||
set -o errtrace
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
IFS=$'\n'
|
||||
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
# echo "[*] Fetching latest docs version"
|
||||
# cd "$REPO_DIR/docs"
|
||||
# git pull
|
||||
# cd "$REPO_DIR"
|
||||
|
||||
# echo "[+] Building docs"
|
||||
# sphinx-apidoc -o docs archivebox
|
||||
# cd "$REPO_DIR/docs"
|
||||
# make html
|
||||
# cd "$REPO_DIR"
|
||||
|
||||
echo "[*] Cleaning up build dirs"
|
||||
cd "$REPO_DIR"
|
||||
rm -Rf build dist archivebox.egg-info
|
||||
|
||||
echo "[+] Building sdist, bdist_egg, and bdist_wheel"
|
||||
python3 setup.py sdist bdist_egg bdist_wheel
|
||||
|
||||
echo "[+] Building docker image in the background..."
|
||||
docker build . -t archivebox \
|
||||
-t archivebox:latest > /tmp/archivebox_docker_build.log 2>&1 &
|
||||
ps "$!"
|
||||
|
||||
echo "[√] Done. Install the built package by running:"
|
||||
echo " python3 setup.py install"
|
||||
echo " # or"
|
||||
echo " pip3 install ."
|
|
@ -10,29 +10,28 @@ set -o nounset
|
|||
set -o pipefail
|
||||
IFS=$'\n'
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
VERSION_FILE="$DIR/archivebox/VERSION"
|
||||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && cd .. && pwd )"
|
||||
|
||||
function bump_semver {
|
||||
echo "$1" | awk -F. '{$NF = $NF + 1;} 1' | sed 's/ /./g'
|
||||
}
|
||||
|
||||
source "$DIR/.venv/bin/activate"
|
||||
cd "$DIR"
|
||||
source "$REPO_DIR/.venv/bin/activate"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
OLD_VERSION="$(cat "$VERSION_FILE")"
|
||||
OLD_VERSION="$(jq -r '.version' < "$REPO_DIR/package.json")"
|
||||
NEW_VERSION="$(bump_semver "$OLD_VERSION")"
|
||||
|
||||
echo "[*] Fetching latest docs version"
|
||||
cd "$DIR/docs"
|
||||
cd "$REPO_DIR/docs"
|
||||
git pull
|
||||
cd "$DIR"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
echo "[+] Building docs"
|
||||
sphinx-apidoc -o docs archivebox
|
||||
cd "$DIR/docs"
|
||||
cd "$REPO_DIR/docs"
|
||||
make html
|
||||
cd "$DIR"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
if [ -z "$(git status --porcelain)" ] && [[ "$(git branch --show-current)" == "master" ]]; then
|
||||
git pull
|
||||
|
@ -43,19 +42,21 @@ else
|
|||
fi
|
||||
|
||||
echo "[*] Bumping VERSION from $OLD_VERSION to $NEW_VERSION"
|
||||
echo "$NEW_VERSION" > "$VERSION_FILE"
|
||||
git add "$DIR/docs"
|
||||
git add "$VERSION_FILE"
|
||||
contents="$(jq ".version = \"$NEW_VERSION\"" "$REPO_DIR/package.json")" && \
|
||||
echo "${contents}" > package.json
|
||||
git add "$REPO_DIR/docs"
|
||||
git add "$REPO_DIR/package.json"
|
||||
git add "$REPO_DIR/package-lock.json"
|
||||
|
||||
echo "[*] Cleaning up build dirs"
|
||||
cd "$DIR"
|
||||
rm -Rf build dist
|
||||
cd "$REPO_DIR"
|
||||
rm -Rf build dist archivebox.egg-info
|
||||
|
||||
echo "[+] Building sdist and bdist_wheel"
|
||||
python3 setup.py sdist bdist_wheel
|
||||
python3 setup.py sdist bdist_egg bdist_wheel
|
||||
|
||||
echo "[^] Pushing source to github"
|
||||
git add "$DIR/archivebox.egg-info"
|
||||
git add "$REPO_DIR/archivebox.egg-info"
|
||||
git commit -m "$NEW_VERSION release"
|
||||
git tag -a "v$NEW_VERSION" -m "v$NEW_VERSION"
|
||||
git push origin master
|
||||
|
|
|
@ -31,7 +31,7 @@ services:
|
|||
# host machine, add tasks and see more info with archivebox schedule --help
|
||||
# scheduler:
|
||||
# image: nikisweeting/archivebox:latest
|
||||
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNA<E/feed/all'
|
||||
# command: schedule --foreground --every=day --depth=1 'https://getpocket.com/users/USERNAME/feed/all'
|
||||
# environment:
|
||||
# - USE_COLOR=True
|
||||
# - SHOW_PROGRESS=False
|
||||
|
|
2
docs
2
docs
|
@ -1 +1 @@
|
|||
Subproject commit 101aec0bc1e98c1f7b1a42281a686a098ef9cdde
|
||||
Subproject commit 4a7052eb5000f179ece678c0e98eea3cb581c079
|
6
package-lock.json
generated
6
package-lock.json
generated
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "archivebox",
|
||||
"version": "0.4.14",
|
||||
"version": "0.4.19",
|
||||
"lockfileVersion": 1,
|
||||
"requires": true,
|
||||
"dependencies": {
|
||||
|
@ -902,7 +902,7 @@
|
|||
"integrity": "sha512-N5ZAX4/LxJmF+7wN74pUD6qAh9/wnvdQcjq9TZjevvXzSUo7bfmw91saqMjzGS2xq91/odN2dW/WOl7qQHNDGA=="
|
||||
},
|
||||
"readability-extractor": {
|
||||
"version": "git+https://github.com/pirate/readability-extractor.git#afa6a5bb8473f629ee3f1e0dcbf093b73d4eff40",
|
||||
"version": "git+https://github.com/pirate/readability-extractor.git#0098f142b0a015c8c90766d3b74d9eb6fb7b7e6a",
|
||||
"from": "git+https://github.com/pirate/readability-extractor.git",
|
||||
"requires": {
|
||||
"@mozilla/readability": "^0.3.0",
|
||||
|
@ -1054,7 +1054,7 @@
|
|||
"integrity": "sha1-SysbJ+uAip+NzEgaWOXlb1mfP2E="
|
||||
},
|
||||
"single-file": {
|
||||
"version": "git+https://github.com/gildas-lormeau/SingleFile.git#27c1ba673979f593b3c2c6cd353634bf869743f9",
|
||||
"version": "git+https://github.com/gildas-lormeau/SingleFile.git#e2e15381a6cbb9c3a6ca0ea8ff7307174e98ad12",
|
||||
"from": "git+https://github.com/gildas-lormeau/SingleFile.git",
|
||||
"requires": {
|
||||
"file-url": "^3.0.0",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "archivebox",
|
||||
"version": "0.4.14",
|
||||
"version": "0.4.20",
|
||||
"description": "ArchiveBox: The self-hosted internet archive",
|
||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||
"license": "MIT",
|
||||
|
@ -8,7 +8,9 @@
|
|||
"archivebox": "./bin/archive"
|
||||
},
|
||||
"bin": {
|
||||
"archivebox": "./bin/archive"
|
||||
"archivebox-node": "./bin/archive",
|
||||
"single-file": "./node_modules/.bin/single-file",
|
||||
"readability-extractor": "./node_modules/.bin/readability-extractor"
|
||||
},
|
||||
"dependencies": {
|
||||
"readability-extractor": "git+https://github.com/pirate/readability-extractor.git",
|
||||
|
|
62
setup.py
62
setup.py
|
@ -1,18 +1,57 @@
|
|||
# import sys
|
||||
import json
|
||||
import setuptools
|
||||
|
||||
from pathlib import Path
|
||||
# from subprocess import check_call
|
||||
# from setuptools.command.install import install
|
||||
# from setuptools.command.develop import develop
|
||||
# from setuptools.command.egg_info import egg_info
|
||||
|
||||
|
||||
PKG_NAME = "archivebox"
|
||||
REPO_URL = "https://github.com/pirate/ArchiveBox"
|
||||
BASE_DIR = Path(__file__).parent.resolve()
|
||||
SOURCE_DIR = BASE_DIR / PKG_NAME
|
||||
README = (BASE_DIR / "README.md").read_text()
|
||||
VERSION = (SOURCE_DIR / "VERSION").read_text().strip()
|
||||
REPO_DIR = Path(__file__).parent.resolve()
|
||||
PYTHON_DIR = REPO_DIR / PKG_NAME
|
||||
README = (PYTHON_DIR / "README.md").read_text()
|
||||
VERSION = json.loads((PYTHON_DIR / "package.json").read_text().strip())['version']
|
||||
|
||||
# To see when setup.py gets called (uncomment for debugging):
|
||||
|
||||
# To see when setup.py gets called (uncomment for debugging)
|
||||
# import sys
|
||||
# print(SOURCE_DIR, f" (v{VERSION})")
|
||||
# print(PYTHON_DIR, f" (v{VERSION})")
|
||||
# print('>', sys.executable, *sys.argv)
|
||||
# raise SystemExit(0)
|
||||
|
||||
# Sketchy way to install npm dependencies as a pip post-install script
|
||||
|
||||
# def setup_js():
|
||||
# if sys.platform.lower() not in ('darwin', 'linux'):
|
||||
# sys.stderr.write('[!] Warning: ArchiveBox is not officially supported on this platform.\n')
|
||||
|
||||
# sys.stderr.write(f'[+] Installing ArchiveBox npm package (PYTHON_DIR={PYTHON_DIR})...\n')
|
||||
# try:
|
||||
# check_call(f'npm install -g "{REPO_DIR}"', shell=True)
|
||||
# sys.stderr.write('[√] Automatically installed npm dependencies.\n')
|
||||
# except Exception as err:
|
||||
# sys.stderr.write(f'[!] Failed to auto-install npm dependencies: {err}\n')
|
||||
# sys.stderr.write(' Install NPM/npm using your system package manager, then run:\n')
|
||||
# sys.stderr.write(' npm install -g "git+https://github.com/pirate/ArchiveBox.git\n')
|
||||
|
||||
|
||||
# class CustomInstallCommand(install):
|
||||
# def run(self):
|
||||
# super().run()
|
||||
# setup_js()
|
||||
|
||||
# class CustomDevelopCommand(develop):
|
||||
# def run(self):
|
||||
# super().run()
|
||||
# setup_js()
|
||||
|
||||
# class CustomEggInfoCommand(egg_info):
|
||||
# def run(self):
|
||||
# super().run()
|
||||
# setup_js()
|
||||
|
||||
setuptools.setup(
|
||||
name=PKG_NAME,
|
||||
|
@ -72,13 +111,18 @@ setuptools.setup(
|
|||
# 'redis': ['redis', 'django-redis'],
|
||||
# 'pywb': ['pywb', 'redis'],
|
||||
},
|
||||
packages=setuptools.find_packages(),
|
||||
packages=['archivebox'],
|
||||
include_package_data=True, # see MANIFEST.in
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
f"{PKG_NAME} = {PKG_NAME}.cli:main",
|
||||
],
|
||||
},
|
||||
include_package_data=True,
|
||||
# cmdclass={
|
||||
# 'install': CustomInstallCommand,
|
||||
# 'develop': CustomDevelopCommand,
|
||||
# 'egg_info': CustomEggInfoCommand,
|
||||
# },
|
||||
classifiers=[
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Natural Language :: English",
|
||||
|
|
Loading…
Reference in a new issue