switch to atomic disk writes for all index operations

This commit is contained in:
Nick Sweeting 2019-03-27 18:24:57 -04:00
parent a214bd7c02
commit d2a34f2602
2 changed files with 47 additions and 33 deletions

View file

@ -25,6 +25,7 @@ from .util import (
enforce_types, enforce_types,
TimedProgress, TimedProgress,
copy_and_overwrite, copy_and_overwrite,
atomic_write,
) )
from .parse import parse_links from .parse import parse_links
from .links import validate_links from .links import validate_links
@ -113,11 +114,7 @@ def write_json_links_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
'updated': datetime.now(), 'updated': datetime.now(),
'links': links, 'links': links,
} }
atomic_write(index_json, path)
with open(path, 'w', encoding='utf-8') as f:
json.dump(index_json, f, indent=4, cls=ExtendedEncoder)
chmod_file(path)
@enforce_types @enforce_types
@ -141,15 +138,17 @@ def parse_json_links_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
@enforce_types @enforce_types
def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False) -> None: def write_html_links_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
"""write the html link index to a given path""" """write the html link index to a given path"""
path = os.path.join(out_dir, 'index.html') path = os.path.join(out_dir, 'index.html')
copy_and_overwrite(os.path.join(TEMPLATES_DIR, 'static'), os.path.join(out_dir, 'static')) copy_and_overwrite(
os.path.join(TEMPLATES_DIR, 'static'),
os.path.join(out_dir, 'static'),
)
with open(os.path.join(out_dir, 'robots.txt'), 'w+') as f: atomic_write('User-agent: *\nDisallow: /', os.path.join(out_dir, 'robots.txt'))
f.write('User-agent: *\nDisallow: /')
with open(os.path.join(TEMPLATES_DIR, 'index.html'), 'r', encoding='utf-8') as f: with open(os.path.join(TEMPLATES_DIR, 'index.html'), 'r', encoding='utf-8') as f:
index_html = f.read() index_html = f.read()
@ -187,10 +186,8 @@ def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False
'status': 'finished' if finished else 'running', 'status': 'finished' if finished else 'running',
} }
with open(path, 'w', encoding='utf-8') as f: atomic_write(Template(index_html).substitute(**template_vars), path)
f.write(Template(index_html).substitute(**template_vars))
chmod_file(path)
@enforce_types @enforce_types
@ -225,8 +222,7 @@ def patch_links_index(link: Link, out_dir: str=OUTPUT_DIR) -> None:
html[idx] = '<span>{}</span>'.format(successful) html[idx] = '<span>{}</span>'.format(successful)
break break
with open(html_path, 'w') as f: atomic_write('\n'.join(html), html_path)
f.write('\n'.join(html))
### Individual link index ### Individual link index
@ -246,7 +242,7 @@ def write_json_link_index(link: Link, link_dir: Optional[str]=None) -> None:
link_dir = link_dir or link.link_dir link_dir = link_dir or link.link_dir
path = os.path.join(link_dir, 'index.json') path = os.path.join(link_dir, 'index.json')
chmod_file(path) atomic_write(link._asdict(), path)
@enforce_types @enforce_types
@ -279,23 +275,22 @@ def write_html_link_index(link: Link, link_dir: Optional[str]=None) -> None:
with open(os.path.join(TEMPLATES_DIR, 'link_index.html'), 'r', encoding='utf-8') as f: with open(os.path.join(TEMPLATES_DIR, 'link_index.html'), 'r', encoding='utf-8') as f:
link_html = f.read() link_html = f.read()
path = os.path.join(out_dir, 'index.html') path = os.path.join(link_dir, 'index.html')
with open(path, 'w', encoding='utf-8') as f: html_index = Template(link_html).substitute({
f.write(Template(link_html).substitute({ **derived_link_info(link),
**derived_link_info(link), 'title': (
'title': ( link.title
link.title or (link.base_url if link.is_archived else TITLE_LOADING_MSG)
or (link.base_url if link.is_archived else TITLE_LOADING_MSG) ),
), 'archive_url': urlencode(
'archive_url': urlencode( wget_output_path(link)
wget_output_path(link) or (link.domain if link.is_archived else 'about:blank')
or (link.domain if link.is_archived else 'about:blank') ),
), 'extension': link.extension or 'html',
'extension': link.extension or 'html', 'tags': link.tags or 'untagged',
'tags': link.tags or 'untagged', 'status': 'archived' if link.is_archived else 'not yet archived',
'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger',
'status_color': 'success' if link.is_archived else 'danger', })
}))
chmod_file(path) atomic_write(html_index, path)

View file

@ -670,3 +670,22 @@ class ExtendedEncoder(JSONEncoder):
return tuple(obj) return tuple(obj)
return JSONEncoder.default(self, obj) return JSONEncoder.default(self, obj)
def atomic_write(contents: Union[dict, str], path: str):
"""Safe atomic file write and swap using a tmp file"""
try:
tmp_file = '{}.tmp'.format(path)
with open(tmp_file, 'w+', encoding='utf-8') as f:
if isinstance(contents, dict):
json.dump(contents, f, indent=4, cls=ExtendedEncoder)
else:
f.write(contents)
os.fsync(f.fileno())
os.rename(tmp_file, path)
chmod_file(path)
finally:
if os.path.exists(tmp_file):
os.remove(tmp_file)