mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-22 20:23:12 +00:00
better function naming
This commit is contained in:
parent
328a59749b
commit
eb003f6a26
2 changed files with 7 additions and 11 deletions
|
@ -28,7 +28,7 @@ from config import (
|
|||
from util import (
|
||||
check_dependencies,
|
||||
download_url,
|
||||
save_source,
|
||||
save_stdin_source,
|
||||
pretty_path,
|
||||
migrate_data,
|
||||
check_links_structure,
|
||||
|
@ -204,8 +204,7 @@ if __name__ == '__main__':
|
|||
if source and any(source.startswith(s) for s in ('http://', 'https://', 'ftp://')):
|
||||
source = download_url(source)
|
||||
elif stdin_raw_text:
|
||||
source = save_source(stdin_raw_text)
|
||||
|
||||
source = save_stdin_source(stdin_raw_text)
|
||||
|
||||
# Step 1: Parse the links and dedupe them with existing archive
|
||||
all_links, new_links = load_links(archive_path=out_dir, import_path=source)
|
||||
|
@ -213,15 +212,12 @@ if __name__ == '__main__':
|
|||
# Step 2: Write new index
|
||||
write_links_index(out_dir=out_dir, links=all_links)
|
||||
|
||||
# Step 3: Verify folder structure is 1:1 with index
|
||||
# cleanup_archive(out_dir, links)
|
||||
|
||||
# Step 4: Run the archive methods for each link
|
||||
# Step 3: Run the archive methods for each link
|
||||
if ONLY_NEW:
|
||||
update_archive(out_dir, new_links, source=source, resume=resume, append=True)
|
||||
else:
|
||||
update_archive(out_dir, all_links, source=source, resume=resume, append=True)
|
||||
|
||||
# Step 5: Re-write links index with updated titles, icons, and resources
|
||||
# Step 4: Re-write links index with updated titles, icons, and resources
|
||||
all_links, _ = load_links(archive_path=out_dir)
|
||||
write_links_index(out_dir=out_dir, links=all_links)
|
||||
|
|
|
@ -205,7 +205,7 @@ def pretty_path(path):
|
|||
return path.replace(REPO_DIR + '/', '')
|
||||
|
||||
|
||||
def save_source(raw_text):
|
||||
def save_stdin_source(raw_text):
|
||||
if not os.path.exists(SOURCES_DIR):
|
||||
os.makedirs(SOURCES_DIR)
|
||||
|
||||
|
@ -233,7 +233,7 @@ def fetch_page_content(url, timeout=TIMEOUT):
|
|||
return resp.read().decode(encoding)
|
||||
|
||||
|
||||
def download_url(url, timeout=TIMEOUT):
|
||||
def save_remote_source(url, timeout=TIMEOUT):
|
||||
"""download a given url's content into downloads/domain.txt"""
|
||||
|
||||
if not os.path.exists(SOURCES_DIR):
|
||||
|
@ -265,7 +265,7 @@ def download_url(url, timeout=TIMEOUT):
|
|||
|
||||
with open(source_path, 'w', encoding='utf-8') as f:
|
||||
f.write(downloaded_xml)
|
||||
|
||||
|
||||
print(' > {}'.format(pretty_path(source_path)))
|
||||
|
||||
return source_path
|
||||
|
|
Loading…
Reference in a new issue