mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-25 05:30:23 +00:00
238 lines
6.7 KiB
Python
238 lines
6.7 KiB
Python
__package__ = 'archivebox.api'
|
|
|
|
import json
|
|
from typing import List, Dict, Any, Optional
|
|
from enum import Enum
|
|
|
|
from ninja import Router, Schema
|
|
|
|
from archivebox.misc.util import ansi_to_html
|
|
from archivebox.config.common import ARCHIVING_CONFIG
|
|
|
|
|
|
# from .auth import API_AUTH_METHODS
|
|
|
|
# router for API that exposes archivebox cli subcommands as REST endpoints
|
|
router = Router(tags=['ArchiveBox CLI Sub-Commands'])
|
|
|
|
|
|
# Schemas
|
|
|
|
JSONType = List[Any] | Dict[str, Any] | bool | int | str | None
|
|
|
|
class CLICommandResponseSchema(Schema):
|
|
success: bool
|
|
errors: List[str]
|
|
result: JSONType
|
|
result_format: str = 'str'
|
|
stdout: str
|
|
stderr: str
|
|
|
|
class FilterTypeChoices(str, Enum):
|
|
exact = 'exact'
|
|
substring = 'substring'
|
|
regex = 'regex'
|
|
domain = 'domain'
|
|
tag = 'tag'
|
|
timestamp = 'timestamp'
|
|
|
|
class StatusChoices(str, Enum):
|
|
indexed = 'indexed'
|
|
archived = 'archived'
|
|
unarchived = 'unarchived'
|
|
present = 'present'
|
|
valid = 'valid'
|
|
invalid = 'invalid'
|
|
duplicate = 'duplicate'
|
|
orphaned = 'orphaned'
|
|
corrupted = 'corrupted'
|
|
unrecognized = 'unrecognized'
|
|
|
|
|
|
class AddCommandSchema(Schema):
|
|
urls: List[str]
|
|
tag: str = ""
|
|
depth: int = 0
|
|
parser: str = "auto"
|
|
extract: str = ""
|
|
update: bool = not ARCHIVING_CONFIG.ONLY_NEW # Default to the opposite of ARCHIVING_CONFIG.ONLY_NEW
|
|
overwrite: bool = False
|
|
index_only: bool = False
|
|
|
|
class UpdateCommandSchema(Schema):
|
|
resume: Optional[float] = 0
|
|
only_new: bool = ARCHIVING_CONFIG.ONLY_NEW
|
|
index_only: bool = False
|
|
overwrite: bool = False
|
|
after: Optional[float] = 0
|
|
before: Optional[float] = 999999999999999
|
|
status: Optional[StatusChoices] = StatusChoices.unarchived
|
|
filter_type: Optional[str] = FilterTypeChoices.substring
|
|
filter_patterns: Optional[List[str]] = ['https://example.com']
|
|
extractors: Optional[str] = ""
|
|
|
|
class ScheduleCommandSchema(Schema):
|
|
import_path: Optional[str] = None
|
|
add: bool = False
|
|
every: Optional[str] = None
|
|
tag: str = ''
|
|
depth: int = 0
|
|
overwrite: bool = False
|
|
update: bool = not ARCHIVING_CONFIG.ONLY_NEW
|
|
clear: bool = False
|
|
|
|
class ListCommandSchema(Schema):
|
|
filter_patterns: Optional[List[str]] = ['https://example.com']
|
|
filter_type: str = FilterTypeChoices.substring
|
|
status: StatusChoices = StatusChoices.indexed
|
|
after: Optional[float] = 0
|
|
before: Optional[float] = 999999999999999
|
|
sort: str = 'bookmarked_at'
|
|
as_json: bool = True
|
|
as_html: bool = False
|
|
as_csv: str | None = 'timestamp,url'
|
|
with_headers: bool = False
|
|
|
|
class RemoveCommandSchema(Schema):
|
|
delete: bool = True
|
|
after: Optional[float] = 0
|
|
before: Optional[float] = 999999999999999
|
|
filter_type: str = FilterTypeChoices.exact
|
|
filter_patterns: Optional[List[str]] = ['https://example.com']
|
|
|
|
|
|
|
|
|
|
|
|
@router.post("/add", response=CLICommandResponseSchema, summary='archivebox add [args] [urls]')
|
|
def cli_add(request, args: AddCommandSchema):
|
|
from archivebox.cli.archivebox_add import add
|
|
|
|
result = add(
|
|
urls=args.urls,
|
|
tag=args.tag,
|
|
depth=args.depth,
|
|
update=args.update,
|
|
index_only=args.index_only,
|
|
overwrite=args.overwrite,
|
|
extract=args.extract,
|
|
parser=args.parser,
|
|
)
|
|
|
|
return {
|
|
"success": True,
|
|
"errors": [],
|
|
"result": result,
|
|
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
|
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
|
}
|
|
|
|
|
|
@router.post("/update", response=CLICommandResponseSchema, summary='archivebox update [args] [filter_patterns]')
|
|
def cli_update(request, args: UpdateCommandSchema):
|
|
from archivebox.cli.archivebox_update import update
|
|
|
|
result = update(
|
|
resume=args.resume,
|
|
only_new=args.only_new,
|
|
index_only=args.index_only,
|
|
overwrite=args.overwrite,
|
|
before=args.before,
|
|
after=args.after,
|
|
status=args.status,
|
|
filter_type=args.filter_type,
|
|
filter_patterns=args.filter_patterns,
|
|
extractors=args.extractors,
|
|
)
|
|
return {
|
|
"success": True,
|
|
"errors": [],
|
|
"result": result,
|
|
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
|
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
|
}
|
|
|
|
|
|
@router.post("/schedule", response=CLICommandResponseSchema, summary='archivebox schedule [args] [import_path]')
|
|
def cli_schedule(request, args: ScheduleCommandSchema):
|
|
from archivebox.cli.archivebox_schedule import schedule
|
|
|
|
result = schedule(
|
|
import_path=args.import_path,
|
|
add=args.add,
|
|
show=args.show,
|
|
clear=args.clear,
|
|
every=args.every,
|
|
tag=args.tag,
|
|
depth=args.depth,
|
|
overwrite=args.overwrite,
|
|
update=args.update,
|
|
)
|
|
|
|
return {
|
|
"success": True,
|
|
"errors": [],
|
|
"result": result,
|
|
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
|
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
|
}
|
|
|
|
|
|
|
|
@router.post("/search", response=CLICommandResponseSchema, summary='archivebox search [args] [filter_patterns]')
|
|
def cli_search(request, args: ListCommandSchema):
|
|
from archivebox.cli.archivebox_search import search
|
|
|
|
result = search(
|
|
filter_patterns=args.filter_patterns,
|
|
filter_type=args.filter_type,
|
|
status=args.status,
|
|
after=args.after,
|
|
before=args.before,
|
|
sort=args.sort,
|
|
csv=args.as_csv,
|
|
json=args.as_json,
|
|
html=args.as_html,
|
|
with_headers=args.with_headers,
|
|
)
|
|
|
|
result_format = 'txt'
|
|
if args.as_json:
|
|
result_format = "json"
|
|
result = json.loads(result)
|
|
elif args.as_html:
|
|
result_format = "html"
|
|
elif args.as_csv:
|
|
result_format = "csv"
|
|
|
|
return {
|
|
"success": True,
|
|
"errors": [],
|
|
"result": result,
|
|
"result_format": result_format,
|
|
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
|
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
|
}
|
|
|
|
|
|
|
|
@router.post("/remove", response=CLICommandResponseSchema, summary='archivebox remove [args] [filter_patterns]')
|
|
def cli_remove(request, args: RemoveCommandSchema):
|
|
from archivebox.cli.archivebox_remove import remove
|
|
|
|
result = remove(
|
|
yes=True, # no way to interactively ask for confirmation via API, so we force yes
|
|
delete=args.delete,
|
|
before=args.before,
|
|
after=args.after,
|
|
filter_type=args.filter_type,
|
|
filter_patterns=args.filter_patterns,
|
|
)
|
|
return {
|
|
"success": True,
|
|
"errors": [],
|
|
"result": result,
|
|
"stdout": ansi_to_html(request.stdout.getvalue().strip()),
|
|
"stderr": ansi_to_html(request.stderr.getvalue().strip()),
|
|
}
|
|
|