use derived_link_info more consistently in index generation

This commit is contained in:
Nick Sweeting 2019-03-03 14:10:11 -05:00
parent 63e6ed8ca9
commit 14e66a6909
5 changed files with 42 additions and 47 deletions

View file

@ -175,17 +175,8 @@ def write_html_link_index(out_dir, link):
with open(path, 'w', encoding='utf-8') as f:
f.write(Template(link_html).substitute({
**link,
**derived_link_info(link),
**link['latest'],
'title': link['title'] or link['url'],
'type': link['type'] or 'website',
'tags': link['tags'] or 'untagged',
'bookmarked': datetime.fromtimestamp(float(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
'updated': datetime.fromtimestamp(float(link['updated'])).strftime('%Y-%m-%d %H:%M'),
'bookmarked_ts': link['timestamp'],
'updated_ts': link['updated'],
'archive_org': link['latest'].get('archive_org') or 'https://web.archive.org/save/{}'.format(link['url']),
'wget': link['latest'].get('wget') or wget_output_path(link),
}))
chmod_file(path)

View file

@ -8,9 +8,9 @@
<td style="text-align: left"><a href="$archive_url" style="font-size:1.4em;text-decoration:none;color:black;" title="$title">
$title <small style="background-color: #eee;border-radius:4px; float:right">$tags</small>
</td>
<td><a href="$screenshot_link" title="Screenshot">🖼</a></td>
<td><a href="$pdf_link" title="PDF">📜</a></td>
<td><a href="$dom_link" title="DOM">📄</a></td>
<td><a href="$screenshot_url" title="Screenshot">🖼</a></td>
<td><a href="$pdf_url" title="PDF">📜</a></td>
<td><a href="$dom_url" title="DOM">📄</a></td>
<td><a href="$archive_org_url" title="Archive.org">🏛</a></td>
<td style="text-align: left"><!--🔗 <img src="$google_favicon_url" height="16px">--> <a href="$url">$url</a></td>
</tr>

View file

@ -2,7 +2,6 @@
<head>
<meta charset="utf-8">
<title>$title</title>
</head>
<body>
<header>
@ -19,9 +18,9 @@
Type: $type<br/>
<br/>
Bookmarked:<br/>
$bookmarked<br/>
<span title="$timestamp">$bookmarked</span><br/>
Archived:<br/>
$updated<br/>
<span title="$updated">$updated_date</span><br/>
</div>
<hr/>
<ul>
@ -30,23 +29,23 @@
$base_url<br/>&nbsp;
</li>
<li>
<a href="$wget"><b>Local Archive</b></a><br/>
<a href="$archive_url"><b>Local Archive</b></a><br/>
archive/$timestamp/$domain<br/>&nbsp;
</li>
<li>
<a href="$pdf" id="pdf-btn"><b>PDF</b></a><br/>
<a href="$pdf_url" id="pdf-btn"><b>PDF</b></a><br/>
archive/$timestamp/output.pdf<br/>&nbsp;
</li>
<li>
<a href="$screenshot"><b>Screenshot</b></a><br/>
<a href="$screenshot_url"><b>Screenshot</b></a><br/>
archive/$timestamp/screenshot.png<br/>&nbsp;
</li>
<li>
<a href="$dom"><b>HTML</b></a><br/>
<a href="$dom_url"><b>HTML</b></a><br/>
archive/$timestamp/output.html<br/>&nbsp;
</li>
<li>
<a href="$archive_org"><b>Archive.Org</b></a><br/>
<a href="$archive_org_url"><b>Archive.Org</b></a><br/>
web.archive.org/web/$base_url<br/>&nbsp;
</li>
</ul>

View file

@ -172,9 +172,9 @@
<div class="site-header container-fluid">
<div class="row archive-page-header">
<div class="col-lg-4 alert well">
Bookmarked: <small title="Timestamp: $bookmarked_ts">$bookmarked</small>
Bookmarked: <small title="Timestamp: $timestamp">$date</small>
&nbsp; | &nbsp;
Last updated: <small title="Timestamp: $updated_ts">$updated</small>
Last updated: <small title="Timestamp: $updated">$updated_date</small>
</div>
<div class="col-lg-4 alert well">
Type:
@ -193,48 +193,48 @@
<hr/>
<div class="col-lg-2">
<div class="card selected-card">
<iframe class="card-img-top" src="$wget" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<iframe class="card-img-top" src="$archive_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$wget" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="$archive_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$wget" target="preview"><h4 class="card-title">Local Archive</h4></a>
<a href="$archive_url" target="preview"><h4 class="card-title">Local Archive</h4></a>
<p class="card-text">archive/$domain</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$dom" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<iframe class="card-img-top" src="$dom_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$dom" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="$dom_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$dom" target="preview"><h4 class="card-title">HTML</h4></a>
<a href="$dom_url" target="preview"><h4 class="card-title">HTML</h4></a>
<p class="card-text">archive/output.html</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$pdf"></iframe>
<iframe class="card-img-top" src="$pdf_url"></iframe>
<div class="card-body">
<a href="$pdf" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="$pdf_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$pdf" target="preview" id="pdf-btn"><h4 class="card-title">PDF</h4></a>
<a href="$pdf_url" target="preview" id="pdf-btn"><h4 class="card-title">PDF</h4></a>
<p class="card-text">archive/output.pdf</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$screenshot" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<iframe class="card-img-top" src="$screenshot_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$screenshot" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="$screenshot_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$screenshot" target="preview"><h4 class="card-title">Screenshot</h4></a>
<a href="$screenshot_url" target="preview"><h4 class="card-title">Screenshot</h4></a>
<p class="card-text">archive/screenshot.png</p>
</div>
</div>
@ -253,19 +253,19 @@
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$archive_org" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<iframe class="card-img-top" src="$archive_org_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$archive_org" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<a href="$archive_org_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_org" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<a href="$archive_org_url" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<p class="card-text">web.archive.org/web/...</p>
</div>
</div>
</div>
</div>
</div>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$wget" name="preview"></iframe>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$archive_url" name="preview"></iframe>
<script
src="https://code.jquery.com/jquery-3.2.1.slim.min.js"

View file

@ -596,14 +596,19 @@ def derived_link_info(link):
url = link['url']
to_date_str = lambda ts: datetime.fromtimestamp(Decimal(ts)).strftime('%Y-%m-%d %H:%M')
extended_info = {
**link,
'title': link['title'] or base_url(url),
'date': datetime.fromtimestamp(Decimal(link['timestamp'])).strftime('%Y-%m-%d %H:%M'),
'date': to_date_str(link['timestamp']),
'updated_date': to_date_str(link['updated']) if 'updated' in link else None,
'base_url': base_url(url),
'domain': domain(url),
'basename': basename(url),
'path': path(url),
'type': link['type'] or 'website',
'tags': link['tags'] or 'untagged',
}
# Archive Method Output URLs
@ -614,9 +619,9 @@ def derived_link_info(link):
'files_url': 'archive/{timestamp}/index.html'.format(**extended_info),
'archive_url': 'archive/{}/{}'.format(link['timestamp'], wget_output_path(link) or 'index.html'),
'warc_url': 'archive/{timestamp}/warc'.format(**extended_info),
'pdf_link': 'archive/{timestamp}/output.pdf'.format(**extended_info),
'screenshot_link': 'archive/{timestamp}/screenshot.png'.format(**extended_info),
'dom_link': 'archive/{timestamp}/output.html'.format(**extended_info),
'pdf_url': 'archive/{timestamp}/output.pdf'.format(**extended_info),
'screenshot_url': 'archive/{timestamp}/screenshot.png'.format(**extended_info),
'dom_url': 'archive/{timestamp}/output.html'.format(**extended_info),
'archive_org_url': 'https://web.archive.org/web/{base_url}'.format(**extended_info),
'git_url': 'archive/{timestamp}/git'.format(**extended_info),
'media_url': 'archive/{timestamp}/media'.format(**extended_info),
@ -627,11 +632,11 @@ def derived_link_info(link):
# wget, screenshot, & pdf urls all point to the same file
if link['type'] in ('PDF', 'image'):
extended_info.update({
'title': basename(link['url']),
'archive_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'pdf_link': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'screenshot_link': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'dom_link': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'title': link['title'] or basename(link['url']),
'pdf_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'screenshot_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'dom_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
})
return extended_info