2020-07-29 16:19:06 +00:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
__package__ = 'archivebox.cli'
|
|
|
|
__command__ = 'archivebox oneshot'
|
|
|
|
|
|
|
|
import sys
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import List, Optional, IO
|
|
|
|
|
|
|
|
from ..main import oneshot
|
|
|
|
from ..util import docstring
|
|
|
|
from ..config import OUTPUT_DIR
|
|
|
|
from ..logging_util import SmartFormatter, accept_stdin, stderr
|
|
|
|
|
|
|
|
|
|
|
|
@docstring(oneshot.__doc__)
|
|
|
|
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
|
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog=__command__,
|
|
|
|
description=oneshot.__doc__,
|
|
|
|
add_help=True,
|
|
|
|
formatter_class=SmartFormatter,
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'url',
|
|
|
|
type=str,
|
|
|
|
default=None,
|
|
|
|
help=(
|
|
|
|
'URLs or paths to archive e.g.:\n'
|
|
|
|
' https://getpocket.com/users/USERNAME/feed/all\n'
|
|
|
|
' https://example.com/some/rss/feed.xml\n'
|
|
|
|
' https://example.com\n'
|
|
|
|
' ~/Downloads/firefox_bookmarks_export.html\n'
|
|
|
|
' ~/Desktop/sites_list.csv\n'
|
|
|
|
)
|
|
|
|
)
|
2020-12-11 13:48:46 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--extract",
|
|
|
|
type=str,
|
|
|
|
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
|
|
|
|
This does not take precedence over the configuration",
|
|
|
|
default=""
|
|
|
|
)
|
2020-07-29 16:19:06 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'--out-dir',
|
|
|
|
type=str,
|
|
|
|
default=OUTPUT_DIR,
|
|
|
|
help= "Path to save the single archive folder to, e.g. ./example.com_archive"
|
|
|
|
)
|
|
|
|
command = parser.parse_args(args or ())
|
2021-02-16 06:20:47 +00:00
|
|
|
stdin_url = None
|
2020-07-29 16:19:06 +00:00
|
|
|
url = command.url
|
2021-02-16 06:20:47 +00:00
|
|
|
if not url:
|
|
|
|
stdin_url = accept_stdin(stdin)
|
|
|
|
|
2020-07-29 16:19:06 +00:00
|
|
|
if (stdin_url and url) or (not stdin and not url):
|
|
|
|
stderr(
|
2020-07-31 15:51:54 +00:00
|
|
|
'[X] You must pass a URL/path to add via stdin or CLI arguments.\n',
|
2020-07-29 16:19:06 +00:00
|
|
|
color='red',
|
|
|
|
)
|
|
|
|
raise SystemExit(2)
|
|
|
|
|
|
|
|
oneshot(
|
|
|
|
url=stdin_url or url,
|
2020-09-09 18:29:41 +00:00
|
|
|
out_dir=Path(command.out_dir).resolve(),
|
2020-12-11 13:48:46 +00:00
|
|
|
extractors=command.extract,
|
2020-07-29 16:19:06 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2020-07-31 14:05:40 +00:00
|
|
|
main(args=sys.argv[1:], stdin=sys.stdin)
|