mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-12-01 00:19:13 +00:00
fix iterator missing chunk_size
This commit is contained in:
parent
d18418cc22
commit
726bcabd82
1 changed files with 6 additions and 6 deletions
|
@ -407,7 +407,7 @@ def snapshot_filter(snapshots: QuerySet, filter_patterns: List[str], filter_type
|
||||||
|
|
||||||
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""indexed links without checking archive status or data directory validity"""
|
"""indexed links without checking archive status or data directory validity"""
|
||||||
links = (snapshot.as_link() for snapshot in snapshots.iterator())
|
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
|
||||||
return {
|
return {
|
||||||
link.link_dir: link
|
link.link_dir: link
|
||||||
for link in links
|
for link in links
|
||||||
|
@ -415,7 +415,7 @@ def get_indexed_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
|
||||||
|
|
||||||
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""indexed links that are archived with a valid data directory"""
|
"""indexed links that are archived with a valid data directory"""
|
||||||
links = (snapshot.as_link() for snapshot in snapshots.iterator())
|
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
|
||||||
return {
|
return {
|
||||||
link.link_dir: link
|
link.link_dir: link
|
||||||
for link in filter(is_archived, links)
|
for link in filter(is_archived, links)
|
||||||
|
@ -423,7 +423,7 @@ def get_archived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
|
||||||
|
|
||||||
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_unarchived_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
"""indexed links that are unarchived with no data directory or an empty data directory"""
|
||||||
links = (snapshot.as_link() for snapshot in snapshots.iterator())
|
links = (snapshot.as_link() for snapshot in snapshots.iterator(chunk_size=500))
|
||||||
return {
|
return {
|
||||||
link.link_dir: link
|
link.link_dir: link
|
||||||
for link in filter(is_unarchived, links)
|
for link in filter(is_unarchived, links)
|
||||||
|
@ -448,7 +448,7 @@ def get_present_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Option
|
||||||
|
|
||||||
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_valid_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""dirs with a valid index matched to the main index and archived content"""
|
"""dirs with a valid index matched to the main index and archived content"""
|
||||||
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator()]
|
links = [snapshot.as_link_with_details() for snapshot in snapshots.iterator(chunk_size=500)]
|
||||||
return {
|
return {
|
||||||
link.link_dir: link
|
link.link_dir: link
|
||||||
for link in filter(is_valid, links)
|
for link in filter(is_valid, links)
|
||||||
|
@ -475,7 +475,7 @@ def get_duplicate_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Opti
|
||||||
if entry.is_dir() and not snapshots.filter(timestamp=entry.name).exists()
|
if entry.is_dir() and not snapshots.filter(timestamp=entry.name).exists()
|
||||||
)
|
)
|
||||||
|
|
||||||
for path in chain(snapshots.iterator(), data_folders):
|
for path in chain(snapshots.iterator(chunk_size=500), data_folders):
|
||||||
link = None
|
link = None
|
||||||
if type(path) is not str:
|
if type(path) is not str:
|
||||||
path = path.as_link().link_dir
|
path = path.as_link().link_dir
|
||||||
|
@ -518,7 +518,7 @@ def get_orphaned_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optio
|
||||||
def get_corrupted_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
def get_corrupted_folders(snapshots, out_dir: Path=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
|
||||||
"""dirs that don't contain a valid index and aren't listed in the main index"""
|
"""dirs that don't contain a valid index and aren't listed in the main index"""
|
||||||
corrupted = {}
|
corrupted = {}
|
||||||
for snapshot in snapshots.iterator():
|
for snapshot in snapshots.iterator(chunk_size=500):
|
||||||
link = snapshot.as_link()
|
link = snapshot.as_link()
|
||||||
if is_corrupt(link):
|
if is_corrupt(link):
|
||||||
corrupted[link.link_dir] = link
|
corrupted[link.link_dir] = link
|
||||||
|
|
Loading…
Reference in a new issue