Merge pull request #555 from cdvv7788/cleanup

This commit is contained in:
Nick Sweeting 2020-12-04 20:40:38 -05:00 committed by GitHub
commit 04291c4d47
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 42 additions and 65 deletions

View file

@ -383,7 +383,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links without checking archive status or data directory validity""" """indexed links without checking archive status or data directory validity"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()] links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return { return {
link.link_dir: link link.link_dir: link
for link in links for link in links
@ -391,7 +391,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are archived with a valid data directory""" """indexed links that are archived with a valid data directory"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()] links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return { return {
link.link_dir: link link.link_dir: link
for link in filter(is_archived, links) for link in filter(is_archived, links)
@ -399,7 +399,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""indexed links that are unarchived with no data directory or an empty data directory""" """indexed links that are unarchived with no data directory or an empty data directory"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()] links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return { return {
link.link_dir: link link.link_dir: link
for link in filter(is_unarchived, links) for link in filter(is_unarchived, links)
@ -424,7 +424,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]: def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""dirs with a valid index matched to the main index and archived content""" """dirs with a valid index matched to the main index and archived content"""
links = [snapshot.as_link() for snapshot in snapshots.iterator()] links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
return { return {
link.link_dir: link link.link_dir: link
for link in filter(is_valid, links) for link in filter(is_valid, links)

View file

@ -46,6 +46,14 @@ def parse_html_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[str]:
yield line.split('"')[1] yield line.split('"')[1]
return () return ()
@enforce_types
def generate_index_from_links(links: List[Link], with_headers: bool):
if with_headers:
output = main_index_template(links)
else:
output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
return output
@enforce_types @enforce_types
def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str: def main_index_template(links: List[Link], template: str=MAIN_INDEX_TEMPLATE) -> str:
"""render the template for the entire main index""" """render the template for the entire main index"""

View file

@ -8,7 +8,7 @@ from pathlib import Path
from datetime import datetime from datetime import datetime
from typing import List, Optional, Iterator, Any, Union from typing import List, Optional, Iterator, Any, Union
from .schema import Link, ArchiveResult from .schema import Link
from ..system import atomic_write from ..system import atomic_write
from ..util import enforce_types from ..util import enforce_types
from ..config import ( from ..config import (
@ -39,7 +39,20 @@ MAIN_INDEX_HEADER = {
}, },
} }
### Main Links Index @enforce_types
def generate_json_index_from_links(links: List[Link], with_headers: bool):
if with_headers:
output = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
else:
output = links
return to_json(output, indent=4, sort_keys=True)
@enforce_types @enforce_types
def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]: def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
@ -65,30 +78,6 @@ def parse_json_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
continue continue
return () return ()
@enforce_types
def write_json_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR) -> None:
"""write the json link index to a given path"""
assert isinstance(links, List), 'Links must be a list, not a generator.'
assert not links or isinstance(links[0].history, dict)
assert not links or isinstance(links[0].sources, list)
if links and links[0].history.get('title'):
assert isinstance(links[0].history['title'][0], ArchiveResult)
if links and links[0].sources:
assert isinstance(links[0].sources[0], str)
main_index_json = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
atomic_write(str(Path(out_dir) / JSON_INDEX_FILENAME), main_index_json)
### Link Details Index ### Link Details Index
@enforce_types @enforce_types

View file

@ -477,39 +477,7 @@ def printable_filesize(num_bytes: Union[int, float]) -> str:
@enforce_types @enforce_types
def printable_folders(folders: Dict[str, Optional["Link"]], def printable_folders(folders: Dict[str, Optional["Link"]],
json: bool=False,
html: bool=False,
csv: Optional[str]=None,
with_headers: bool=False) -> str: with_headers: bool=False) -> str:
from .index.json import MAIN_INDEX_HEADER
links = folders.values()
if json:
from .index.json import to_json
if with_headers:
output = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
'last_run_cmd': sys.argv,
'links': links,
}
else:
output = links
return to_json(output, indent=4, sort_keys=True)
elif html:
from .index.html import main_index_template
if with_headers:
output = main_index_template(links)
else:
from .index.html import MINIMAL_INDEX_TEMPLATE
output = main_index_template(links, template=MINIMAL_INDEX_TEMPLATE)
return output
elif csv:
from .index.csv import links_to_csv
return links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
return '\n'.join( return '\n'.join(
f'{folder} {link and link.url} "{link and link.title}"' f'{folder} {link and link.url} "{link and link.title}"'
for folder, link in folders.items() for folder, link in folders.items()

View file

@ -49,12 +49,17 @@ from .index import (
from .index.json import ( from .index.json import (
parse_json_main_index, parse_json_main_index,
parse_json_links_details, parse_json_links_details,
generate_json_index_from_links,
) )
from .index.sql import ( from .index.sql import (
get_admins, get_admins,
apply_migrations, apply_migrations,
remove_from_sql_main_index, remove_from_sql_main_index,
) )
from .index.html import (
generate_index_from_links,
)
from .index.csv import links_to_csv
from .extractors import archive_links, archive_link, ignore_methods from .extractors import archive_links, archive_link, ignore_methods
from .config import ( from .config import (
stderr, stderr,
@ -745,7 +750,6 @@ def list_all(filter_patterns_str: Optional[str]=None,
elif filter_patterns_str: elif filter_patterns_str:
filter_patterns = filter_patterns_str.split('\n') filter_patterns = filter_patterns_str.split('\n')
snapshots = list_links( snapshots = list_links(
filter_patterns=filter_patterns, filter_patterns=filter_patterns,
filter_type=filter_type, filter_type=filter_type,
@ -762,7 +766,15 @@ def list_all(filter_patterns_str: Optional[str]=None,
out_dir=out_dir, out_dir=out_dir,
) )
print(printable_folders(folders, json=json, csv=csv, html=html, with_headers=with_headers)) if json:
output = generate_json_index_from_links(folders.values(), with_headers)
elif html:
output = generate_index_from_links(folders.values(), with_headers)
elif csv:
output = links_to_csv(folders.values(), cols=csv.split(','), header=with_headers)
else:
output = printable_folders(folders, with_headers=with_headers)
print(output)
return folders return folders