mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2025-02-22 08:18:28 +00:00
add extractors arg to oneshot command and bump version to v0.5.1
This commit is contained in:
parent
a194bb6301
commit
9fa70b3452
4 changed files with 15 additions and 6 deletions
|
@ -89,8 +89,8 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
index_only=command.index_only,
|
||||
overwrite=command.overwrite,
|
||||
init=command.init,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
extractors=command.extract,
|
||||
out_dir=pwd or OUTPUT_DIR,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -36,6 +36,13 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
' ~/Desktop/sites_list.csv\n'
|
||||
)
|
||||
)
|
||||
parser.add_argument(
|
||||
"--extract",
|
||||
type=str,
|
||||
help="Pass a list of the extractors to be used. If the method name is not correct, it will be ignored. \
|
||||
This does not take precedence over the configuration",
|
||||
default=""
|
||||
)
|
||||
parser.add_argument(
|
||||
'--out-dir',
|
||||
type=str,
|
||||
|
@ -55,6 +62,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
|
|||
oneshot(
|
||||
url=stdin_url or url,
|
||||
out_dir=Path(command.out_dir).resolve(),
|
||||
extractors=command.extract,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -511,7 +511,7 @@ def status(out_dir: Path=OUTPUT_DIR) -> None:
|
|||
|
||||
|
||||
@enforce_types
|
||||
def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
|
||||
def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
|
||||
"""
|
||||
Create a single URL archive folder with an index.json and index.html, and all the archive method outputs.
|
||||
You can run this to archive single pages without needing to create a whole collection with archivebox init.
|
||||
|
@ -523,7 +523,8 @@ def oneshot(url: str, out_dir: Path=OUTPUT_DIR):
|
|||
color='red'
|
||||
)
|
||||
raise SystemExit(2)
|
||||
methods = ignore_methods(['title'])
|
||||
|
||||
methods = extractors.split(",") if extractors else ignore_methods(['title'])
|
||||
archive_link(oneshot_link[0], out_dir=out_dir, methods=methods)
|
||||
return oneshot_link
|
||||
|
||||
|
@ -534,8 +535,8 @@ def add(urls: Union[str, List[str]],
|
|||
index_only: bool=False,
|
||||
overwrite: bool=False,
|
||||
init: bool=False,
|
||||
out_dir: Path=OUTPUT_DIR,
|
||||
extractors: str="") -> List[Link]:
|
||||
extractors: str="",
|
||||
out_dir: Path=OUTPUT_DIR) -> List[Link]:
|
||||
"""Add a new URL or list of URLs to your archive"""
|
||||
|
||||
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "archivebox",
|
||||
"version": "0.5.0",
|
||||
"version": "0.5.1",
|
||||
"description": "ArchiveBox: The self-hosted internet archive",
|
||||
"author": "Nick Sweeting <archivebox-npm@sweeting.me>",
|
||||
"license": "MIT",
|
||||
|
|
Loading…
Add table
Reference in a new issue