ArchiveBox/archivebox/cli/archivebox_list.py

#!/usr/bin/env python3

__package__ = 'archivebox.cli'
__command__ = 'archivebox list'

import sys
import argparse

from typing import Optional, List, IO

from ..main import list_all
from ..util import docstring
from ..config import OUTPUT_DIR
from ..index import (
    get_indexed_folders,
    get_archived_folders,
    get_unarchived_folders,
    get_present_folders,
    get_valid_folders,
    get_invalid_folders,
    get_duplicate_folders,
    get_orphaned_folders,
    get_corrupted_folders,
    get_unrecognized_folders,
)
from ..logging_util import SmartFormatter, accept_stdin, stderr


@docstring(list_all.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
    parser = argparse.ArgumentParser(
        prog=__command__,
        description=list_all.__doc__,
        add_help=True,
        formatter_class=SmartFormatter,
    )
    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '--csv', #'-c',
        type=str,
        help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",
        default=None,
    )
    group.add_argument(
        '--json', #'-j',
        action='store_true',
        help="Print the output in JSON format with all columns included.",
    )
    group.add_argument(
        '--html',
        action='store_true',
        help="Print the output in HTML format"
    )
    parser.add_argument(
        '--with-headers',
        action='store_true',
        help='Include the headers in the output document' 
    )
    parser.add_argument(
        '--sort', #'-s',
        type=str,
        help="List the links sorted using the given key, e.g. timestamp or updated.",
        default=None,
    )
    parser.add_argument(
        '--before', #'-b',
        type=float,
        help="List only links bookmarked before the given timestamp.",
        default=None,
    )
    parser.add_argument(
        '--after', #'-a',
        type=float,
        help="List only links bookmarked after the given timestamp.",
        default=None,
    )
    parser.add_argument(
        '--status',
        type=str,
        choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),
        default='indexed',
        help=(
            'List only links or data directories that have the given status\n'
            f'    indexed       {get_indexed_folders.__doc__} (the default)\n'
            f'    archived      {get_archived_folders.__doc__}\n'
            f'    unarchived    {get_unarchived_folders.__doc__}\n'
            '\n'
            f'    present       {get_present_folders.__doc__}\n'
            f'    valid         {get_valid_folders.__doc__}\n'
            f'    invalid       {get_invalid_folders.__doc__}\n'
            '\n'
            f'    duplicate     {get_duplicate_folders.__doc__}\n'
            f'    orphaned      {get_orphaned_folders.__doc__}\n'
            f'    corrupted     {get_corrupted_folders.__doc__}\n'
            f'    unrecognized  {get_unrecognized_folders.__doc__}\n'
        )
    )
    parser.add_argument(
        '--filter-type',
        type=str,
        choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),
        default='exact',
        help='Type of pattern matching to use when filtering URLs',
    )
    parser.add_argument(
        'filter_patterns',
        nargs='*',
        type=str,
        default=None,
        help='List only URLs matching these filter patterns.'
    )
    command = parser.parse_args(args or ())
    filter_patterns_str = accept_stdin(stdin)

    if command.with_headers and not (command.json or command.html or command.csv):
        stderr(
            '[X] --with-headers can only be used with --json, --html or --csv options.\n',
            color='red',
        )
        raise SystemExit(2)

    matching_folders = list_all(
        filter_patterns_str=filter_patterns_str,
        filter_patterns=command.filter_patterns,
        filter_type=command.filter_type,
        status=command.status,
        after=command.after,
        before=command.before,
        sort=command.sort,
        csv=command.csv,
        json=command.json,
        html=command.html,
        with_headers=command.with_headers,
        out_dir=pwd or OUTPUT_DIR,
    )
    raise SystemExit(not matching_folders)

if __name__ == '__main__':
    main(args=sys.argv[1:], stdin=sys.stdin)
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`#!/usr/bin/env python3`

			`__package__ = 'archivebox.cli'`
			`__command__ = 'archivebox list'`

			`import sys`
			`import argparse`

move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`from typing import Optional, List, IO`

refactor: Organize code to remove flake8 issues 2020-07-24 17:25:25 +00:00			`from ..main import list_all`
			`from ..util import docstring`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`from ..config import OUTPUT_DIR`
			`from ..index import (`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00			`get_indexed_folders,`
			`get_archived_folders,`
			`get_unarchived_folders,`
			`get_present_folders,`
			`get_valid_folders,`
			`get_invalid_folders,`
			`get_duplicate_folders,`
			`get_orphaned_folders,`
			`get_corrupted_folders,`
			`get_unrecognized_folders,`
			`)`
feat: list command fails when --index is used without --json or --html 2020-08-19 18:14:04 +00:00			`from ..logging_util import SmartFormatter, accept_stdin, stderr`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00
move docstrings to main.py out of cli files 2019-05-01 03:10:48 +00:00
			`@docstring(list_all.__doc__)`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`parser = argparse.ArgumentParser(`
			`prog=__command__,`
move docstrings to main.py out of cli files 2019-05-01 03:10:48 +00:00			`description=list_all.__doc__,`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`add_help=True,`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00			`formatter_class=SmartFormatter,`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`)`
			`group = parser.add_mutually_exclusive_group()`
			`group.add_argument(`
			`'--csv', #'-c',`
			`type=str,`
			`help="Print the output in CSV format with the given columns, e.g.: timestamp,url,extension",`
			`default=None,`
			`)`
			`group.add_argument(`
			`'--json', #'-j',`
			`action='store_true',`
			`help="Print the output in JSON format with all columns included.",`
			`)`
feat: Add html export to list command 2020-08-19 18:02:12 +00:00			`group.add_argument(`
			`'--html',`
			`action='store_true',`
			`help="Print the output in HTML format"`
			`)`
feat: Add flag to list command to support index like output 2020-08-19 17:32:25 +00:00			`parser.add_argument(`
refactor: Replace --index with --with-headers in the list command to make it more explicit. Change it so it affects the csv output too. 2020-09-08 14:17:10 +00:00			`'--with-headers',`
feat: Add flag to list command to support index like output 2020-08-19 17:32:25 +00:00			`action='store_true',`
refactor: Replace --index with --with-headers in the list command to make it more explicit. Change it so it affects the csv output too. 2020-09-08 14:17:10 +00:00			`help='Include the headers in the output document'`
feat: Add flag to list command to support index like output 2020-08-19 17:32:25 +00:00			`)`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`parser.add_argument(`
			`'--sort', #'-s',`
			`type=str,`
working consistent list and remove with filtering 2019-04-11 11:00:26 +00:00			`help="List the links sorted using the given key, e.g. timestamp or updated.",`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`default=None,`
			`)`
			`parser.add_argument(`
			`'--before', #'-b',`
			`type=float,`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00			`help="List only links bookmarked before the given timestamp.",`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`default=None,`
			`)`
			`parser.add_argument(`
			`'--after', #'-a',`
			`type=float,`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00			`help="List only links bookmarked after the given timestamp.",`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`default=None,`
			`)`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00			`parser.add_argument(`
			`'--status',`
			`type=str,`
			`choices=('indexed', 'archived', 'unarchived', 'present', 'valid', 'invalid', 'duplicate', 'orphaned', 'corrupted', 'unrecognized'),`
			`default='indexed',`
			`help=(`
			`'List only links or data directories that have the given status\n'`
			`f' indexed {get_indexed_folders.__doc__} (the default)\n'`
			`f' archived {get_archived_folders.__doc__}\n'`
			`f' unarchived {get_unarchived_folders.__doc__}\n'`
			`'\n'`
			`f' present {get_present_folders.__doc__}\n'`
			`f' valid {get_valid_folders.__doc__}\n'`
			`f' invalid {get_invalid_folders.__doc__}\n'`
			`'\n'`
			`f' duplicate {get_duplicate_folders.__doc__}\n'`
			`f' orphaned {get_orphaned_folders.__doc__}\n'`
			`f' corrupted {get_corrupted_folders.__doc__}\n'`
			`f' unrecognized {get_unrecognized_folders.__doc__}\n'`
			`)`
			`)`
working consistent list and remove with filtering 2019-04-11 11:00:26 +00:00			`parser.add_argument(`
			`'--filter-type',`
			`type=str,`
feat: add search filter-type to list command 2020-11-20 04:39:28 +00:00			`choices=('exact', 'substring', 'domain', 'regex', 'tag', 'search'),`
working consistent list and remove with filtering 2019-04-11 11:00:26 +00:00			`default='exact',`
			`help='Type of pattern matching to use when filtering URLs',`
			`)`
			`parser.add_argument(`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`'filter_patterns',`
working consistent list and remove with filtering 2019-04-11 11:00:26 +00:00			`nargs='*',`
			`type=str,`
			`default=None,`
			`help='List only URLs matching these filter patterns.'`
			`)`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`command = parser.parse_args(args or ())`
			`filter_patterns_str = accept_stdin(stdin)`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00
refactor: Replace --index with --with-headers in the list command to make it more explicit. Change it so it affects the csv output too. 2020-09-08 14:17:10 +00:00			`if command.with_headers and not (command.json or command.html or command.csv):`
feat: list command fails when --index is used without --json or --html 2020-08-19 18:14:04 +00:00			`stderr(`
refactor: Replace --index with --with-headers in the list command to make it more explicit. Change it so it affects the csv output too. 2020-09-08 14:17:10 +00:00			`'[X] --with-headers can only be used with --json, --html or --csv options.\n',`
feat: list command fails when --index is used without --json or --html 2020-08-19 18:14:04 +00:00			`color='red',`
			`)`
			`raise SystemExit(2)`

better return status from list cmd 2019-05-01 03:11:07 +00:00			`matching_folders = list_all(`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`filter_patterns_str=filter_patterns_str,`
			`filter_patterns=command.filter_patterns,`
working consistent list and remove with filtering 2019-04-11 11:00:26 +00:00			`filter_type=command.filter_type,`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`status=command.status,`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`after=command.after,`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`before=command.before,`
			`sort=command.sort,`
			`csv=command.csv,`
			`json=command.json,`
feat: Add html export to list command 2020-08-19 18:02:12 +00:00			`html=command.html,`
refactor: Replace --index with --with-headers in the list command to make it more explicit. Change it so it affects the csv output too. 2020-09-08 14:17:10 +00:00			`with_headers=command.with_headers,`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`out_dir=pwd or OUTPUT_DIR,`
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`)`
better return status from list cmd 2019-05-01 03:11:07 +00:00			`raise SystemExit(not matching_folders)`
new methods for detecting valid/invalid data dirs on init 2019-04-24 15:40:10 +00:00
working argparse based CLI with most commands implemented 2019-04-03 04:27:37 +00:00			`if __name__ == '__main__':`
move everything out of legacy folder 2019-04-27 21:26:24 +00:00			`main(args=sys.argv[1:], stdin=sys.stdin)`