fix relative links from index files

This commit is contained in:
Nick Sweeting 2019-03-08 17:46:14 -05:00
parent ce13a57a2c
commit 2e10f57f6e
6 changed files with 335 additions and 392 deletions

View file

@ -48,18 +48,21 @@ CHROME_BINARY = os.getenv('CHROME_BINARY', None)
REPO_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
OUTPUT_DIR = os.path.abspath(os.getenv('OUTPUT_DIR', os.path.join(REPO_DIR, 'output')))
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, 'archive')
SOURCES_DIR = os.path.join(OUTPUT_DIR, 'sources')
PYTHON_PATH = os.path.join(REPO_DIR, 'archivebox')
TEMPLATES_DIR = os.path.join(PYTHON_PATH, 'templates')
# ******************************************************************************
# ********************** Do not edit below this point **************************
# ******************************************************************************
CHROME_SANDBOX = os.getenv('CHROME_SANDBOX', 'True' ).lower() == 'true'
ARCHIVE_DIR_NAME = 'archive'
SOURCES_DIR_NAME = 'sources'
ARCHIVE_DIR = os.path.join(OUTPUT_DIR, ARCHIVE_DIR_NAME)
SOURCES_DIR = os.path.join(OUTPUT_DIR, SOURCES_DIR_NAME)
PYTHON_PATH = os.path.join(REPO_DIR, 'archivebox')
TEMPLATES_DIR = os.path.join(PYTHON_PATH, 'templates')
CHROME_SANDBOX = os.getenv('CHROME_SANDBOX', 'True').lower() == 'true'
USE_CHROME = FETCH_PDF or FETCH_SCREENSHOT or FETCH_DOM
USE_WGET = FETCH_WGET or FETCH_WGET_REQUISITES or FETCH_WARC

View file

@ -164,7 +164,7 @@ def parse_json_link_index(out_dir):
def write_html_link_index(out_dir, link):
check_link_structure(link)
with open(os.path.join(TEMPLATES_DIR, 'link_index_fancy.html'), 'r', encoding='utf-8') as f:
with open(os.path.join(TEMPLATES_DIR, 'link_index.html'), 'r', encoding='utf-8') as f:
link_html = f.read()
path = os.path.join(out_dir, 'index.html')

View file

@ -1,16 +1,16 @@
<tr>
<td title="Bookmarked timestamp: $timestamp">$bookmarked_date</td>
<td>
<a href="$files_url" title="Link Index">
<img src="$favicon_url" onerror="this.src='static/spinner.gif'" class="link-favicon">
<a href="$link_dir/$index_url" title="Link Index">
<img src="$link_dir/$favicon_url" onerror="this.src='static/spinner.gif'" class="link-favicon">
</a>
</td>
<td style="text-align: left"><a href="$archive_url" style="font-size:1.4em;text-decoration:none;color:black;" title="$title">
<td style="text-align: left"><a href="$link_dir/$archive_url" style="font-size:1.4em;text-decoration:none;color:black;" title="$title">
$title <small style="background-color: #eee;border-radius:4px; float:right">$tags</small>
</td>
<td><a href="$screenshot_url" title="Screenshot">🖼</a></td>
<td><a href="$pdf_url" title="PDF">📜</a></td>
<td><a href="$dom_url" title="DOM">📄</a></td>
<td><a href="$link_dir/$screenshot_url" title="Screenshot">🖼</a></td>
<td><a href="$link_dir/$pdf_url" title="PDF">📜</a></td>
<td><a href="$link_dir/$dom_url" title="DOM">📄</a></td>
<td><a href="$archive_org_url" title="Archive.org">🏛</a></td>
<td style="text-align: left"><!--🔗 <img src="$google_favicon_url" height="16px">--> <a href="$url">$url</a></td>
</tr>

View file

@ -2,61 +2,318 @@
<head>
<meta charset="utf-8">
<title>$title</title>
<style>
html, body {
width: 100%;
height: 100%;
}
body {
background-color: #ddd;
}
header {
width: 100%;
height: 90px;
background-color: #aa1e55;
margin: 0px;
text-align: center;
color: white;
}
header h1 {
padding-top: 5px;
padding-bottom: 5px;
margin: 0px;
font-weight: 200;
font-family: "Gill Sans", Helvetica, sans-serif;
font-size: calc(16px + 1vw);
}
.collapse-icon {
float: right;
color: black;
width: 126px;
font-size: 0.8em;
margin-top: 20px;
margin-right: 0px;
margin-left: -35px;
}
.nav-icon img {
float: left;
display: block;
margin-right: 13px;
color: black;
height: 53px;
margin-top: 12px;
margin-left: 10px;
}
.nav-icon img:hover {
opacity: 0.5;
}
.title-url {
color: black;
display: block;
width: 75%;
white-space: nowrap;
overflow: hidden;
margin: auto;
}
.archive-page-header {
margin-top: 5px;
margin-bottom: 5px;
}
.archive-page-header .alert {
margin-bottom: 0px;
}
h1 small {
opacity: 0.4;
font-size: 0.6em;
}
h1 small:hover {
opacity: 0.8;
}
.card {
box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02);
}
.card h4 {
font-size: 1.4vw;
}
.card-body {
font-size: 1vw;
padding-top: 1.2vw;
padding-left: 1vw;
padding-right: 1vw;
padding-bottom: 1vw;
line-height: 1.1;
word-wrap: break-word;
max-height: 102px;
overflow: hidden;
}
.card-img-top {
border: 0px;
padding: 0px;
margin: 0px;
overflow: hidden;
opacity: 0.8;
border-top: 1px solid gray;
border-radius: 3px;
border-bottom: 1px solid #ddd;
height: 430px;
width: 400%;
margin-bottom: -330px;
transform: scale(0.25);
transform-origin: 0 0;
}
.full-page-iframe {
border-top: 1px solid #ddd;
width: 100%;
height: 69vh;
margin: 0px;
border: 0px;
border-top: 3px solid #aa1e55;
}
.card.selected-card {
border: 2px solid orange;
box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05);
}
.iframe-large {
height: 93%;
margin-top: -10px;
}
img.external {
height: 30px;
margin-right: -10px;
padding: 3px;
border-radius: 4px;
vertical-align: middle;
border: 4px solid rgba(0,0,0,0);
}
img.external:hover {
border: 4px solid green;
}
@media(max-width: 1092px) {
iframe {
display: none;
}
}
@media(max-width: 728px) {
.card h4 {
font-size: 5vw;
}
.card-body {
font-size: 4vw;
}
.card {
margin-bottom: 5px;
}
header > h1 > a.collapse-icon, header > h1 > a.nav-icon {
display: none;
}
}
</style>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" integrity="sha384-rwoIResjU2yc3z8GV/NPeZWAv56rSmLldC3R/AZzGRnGxQQKnKkoFVhFQhNUwEyJ" crossorigin="anonymous">
</head>
<body>
<header>
<h1>
<h1 class="page-title">
<a href="../../index.html" class="nav-icon" title="Go to Main Index...">
<img src="../../static/archive.png" alt="Archive Icon">
</a>
<a href="#" class="collapse-icon" style="text-decoration: none" title="Toggle info panel...">
</a>
<img src="$favicon_url" height="20px"> $title<br/>
<a href="$url" class="title-url">
<small>$base_url</small>
</a>
</h1>
</header>
<hr/>
<div>
Tags: $tags<br/>
Type: $type<br/>
<br/>
Bookmarked:<br/>
<span title="$timestamp">$bookmarked_date</span><br/>
Archived:<br/>
<span title="$updated">$updated_date</span><br/>
<div class="site-header container-fluid">
<div class="row archive-page-header">
<div class="col-lg-4 alert well">
Bookmarked: <small title="Timestamp: $timestamp">$bookmarked_date</small>
&nbsp; | &nbsp;
Last updated: <small title="Timestamp: $updated">$updated_date</small>
</div>
<div class="col-lg-4 alert well">
Type:
<span class="badge badge-default">$type</span>
&nbsp; | &nbsp;
Tags:
<span class="badge badge-success">$tags</span>
</div>
<div class="col-lg-4 alert well">
Download:
<a href="index.json" title="JSON summary of archived link.">JSON</a> |
<a href="." title="Webserver-provided index of files directory.">Files</a> |
<a href="media/" title="Audio, Video, and Subtitle files.">Media</a> |
<a href="git/" title="Any git repos at the url">Git Code</a>
</div>
<hr/>
<div class="col-lg-2">
<div class="card selected-card">
<iframe class="card-img-top" src="$archive_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$archive_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_url" target="preview"><h4 class="card-title">Local Archive</h4></a>
<p class="card-text">archive/$domain</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$dom_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$dom_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$dom_url" target="preview"><h4 class="card-title">HTML</h4></a>
<p class="card-text">archive/output.html</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$pdf_url"></iframe>
<div class="card-body">
<a href="$pdf_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$pdf_url" target="preview" id="pdf-btn"><h4 class="card-title">PDF</h4></a>
<p class="card-text">archive/output.pdf</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$screenshot_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$screenshot_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$screenshot_url" target="preview"><h4 class="card-title">Screenshot</h4></a>
<p class="card-text">archive/screenshot.png</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
<p class="card-text">$domain</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$archive_org_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$archive_org_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_org_url" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<p class="card-text">web.archive.org/web/...</p>
</div>
</div>
</div>
</div>
</div>
<hr/>
<ul>
<li>
<a href="$url"><b>Original</b></a><br/>
$base_url<br/>&nbsp;
</li>
<li>
<a href="$archive_url"><b>Local Archive</b></a><br/>
archive/$timestamp/$domain<br/>&nbsp;
</li>
<li>
<a href="$pdf_url" id="pdf-btn"><b>PDF</b></a><br/>
archive/$timestamp/output.pdf<br/>&nbsp;
</li>
<li>
<a href="$screenshot_url"><b>Screenshot</b></a><br/>
archive/$timestamp/screenshot.png<br/>&nbsp;
</li>
<li>
<a href="$dom_url"><b>HTML</b></a><br/>
archive/$timestamp/output.html<br/>&nbsp;
</li>
<li>
<a href="$archive_org_url"><b>Archive.Org</b></a><br/>
web.archive.org/web/$base_url<br/>&nbsp;
</li>
</ul>
<footer>
<hr/>
<a href="index.json">JSON</a> | <a href=".">Files</a>
<hr/>
<a href="./../../index.html" class="nav-icon" title="Archived Sites">
<img src="https://nicksweeting.com/images/archive.png" alt="Archive Icon" height="20px">
ArchiveBox: Link Index
</a>
</footer>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$archive_url" name="preview"></iframe>
<script
src="https://code.jquery.com/jquery-3.2.1.slim.min.js"
integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g="
crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/js/bootstrap.min.js" integrity="sha384-vBWWzlZJ8ea9aCX4pEW3rVHjgjt7zpkNpZk+02D9phzyeVkE+jo0ieGizqPLForn" crossorigin="anonymous"></script>
<script>
// show selected file in iframe when preview card is clicked
jQuery('.card').on('click', function(e) {
jQuery('.selected-card').removeClass('selected-card')
jQuery(e.target).closest('.card').addClass('selected-card')
})
jQuery('.card a[target=preview]').on('click', function(e) {
if (e.currentTarget.href.endsWith('.pdf')) {
jQuery('.full-page-iframe')[0].removeAttribute('sandbox')
} else {
jQuery('.full-page-iframe')[0].sandbox = "allow-same-origin allow-scripts allow-forms"
}
return true
})
// un-sandbox iframes showing pdfs (required to display pdf viewer)
jQuery('iframe').map(function() {
if (this.src.endsWith('.pdf')) {
this.removeAttribute('sandbox')
this.src = this.src
}
})
// hide header when collapse icon is clicked
jQuery('.collapse-icon').on('click', function() {
if (jQuery('.collapse-icon').text().includes('▾')) {
jQuery('.collapse-icon').text('▸')
jQuery('.site-header').hide()
jQuery('.full-page-iframe').addClass('iframe-large')
} else {
jQuery('.collapse-icon').text('▾')
jQuery('.site-header').show()
jQuery('.full-page-iframe').removeClass('iframe-large')
}
return true
})
// hide all preview iframes on small screens
if (window.innerWidth < 1091) {
jQuery('.card a[target=preview]').attr('target', '_self')
}
</script>
</body>
</html>

View file

@ -1,319 +0,0 @@
<html>
<head>
<meta charset="utf-8">
<title>$title</title>
<style>
html, body {
width: 100%;
height: 100%;
}
body {
background-color: #ddd;
}
header {
width: 100%;
height: 90px;
background-color: #aa1e55;
margin: 0px;
text-align: center;
color: white;
}
header h1 {
padding-top: 5px;
padding-bottom: 5px;
margin: 0px;
font-weight: 200;
font-family: "Gill Sans", Helvetica, sans-serif;
font-size: calc(16px + 1vw);
}
.collapse-icon {
float: right;
color: black;
width: 126px;
font-size: 0.8em;
margin-top: 20px;
margin-right: 0px;
margin-left: -35px;
}
.nav-icon img {
float: left;
display: block;
margin-right: 13px;
color: black;
height: 53px;
margin-top: 12px;
margin-left: 10px;
}
.nav-icon img:hover {
opacity: 0.5;
}
.title-url {
color: black;
display: block;
width: 75%;
white-space: nowrap;
overflow: hidden;
margin: auto;
}
.archive-page-header {
margin-top: 5px;
margin-bottom: 5px;
}
.archive-page-header .alert {
margin-bottom: 0px;
}
h1 small {
opacity: 0.4;
font-size: 0.6em;
}
h1 small:hover {
opacity: 0.8;
}
.card {
box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02);
}
.card h4 {
font-size: 1.4vw;
}
.card-body {
font-size: 1vw;
padding-top: 1.2vw;
padding-left: 1vw;
padding-right: 1vw;
padding-bottom: 1vw;
line-height: 1.1;
word-wrap: break-word;
max-height: 102px;
overflow: hidden;
}
.card-img-top {
border: 0px;
padding: 0px;
margin: 0px;
overflow: hidden;
opacity: 0.8;
border-top: 1px solid gray;
border-radius: 3px;
border-bottom: 1px solid #ddd;
height: 430px;
width: 400%;
margin-bottom: -330px;
transform: scale(0.25);
transform-origin: 0 0;
}
.full-page-iframe {
border-top: 1px solid #ddd;
width: 100%;
height: 69vh;
margin: 0px;
border: 0px;
border-top: 3px solid #aa1e55;
}
.card.selected-card {
border: 2px solid orange;
box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05);
}
.iframe-large {
height: 93%;
margin-top: -10px;
}
img.external {
height: 30px;
margin-right: -10px;
padding: 3px;
border-radius: 4px;
vertical-align: middle;
border: 4px solid rgba(0,0,0,0);
}
img.external:hover {
border: 4px solid green;
}
@media(max-width: 1092px) {
iframe {
display: none;
}
}
@media(max-width: 728px) {
.card h4 {
font-size: 5vw;
}
.card-body {
font-size: 4vw;
}
.card {
margin-bottom: 5px;
}
header > h1 > a.collapse-icon, header > h1 > a.nav-icon {
display: none;
}
}
</style>
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/css/bootstrap.min.css" integrity="sha384-rwoIResjU2yc3z8GV/NPeZWAv56rSmLldC3R/AZzGRnGxQQKnKkoFVhFQhNUwEyJ" crossorigin="anonymous">
</head>
<body>
<header>
<h1 class="page-title">
<a href="../../index.html" class="nav-icon" title="Go to Main Index...">
<img src="../../static/archive.png" alt="Archive Icon">
</a>
<a href="#" class="collapse-icon" style="text-decoration: none" title="Toggle info panel...">
</a>
<img src="$favicon_url" height="20px"> $title<br/>
<a href="$url" class="title-url">
<small>$base_url</small>
</a>
</h1>
</header>
<div class="site-header container-fluid">
<div class="row archive-page-header">
<div class="col-lg-4 alert well">
Bookmarked: <small title="Timestamp: $timestamp">$bookmarked_date</small>
&nbsp; | &nbsp;
Last updated: <small title="Timestamp: $updated">$updated_date</small>
</div>
<div class="col-lg-4 alert well">
Type:
<span class="badge badge-default">$type</span>
&nbsp; | &nbsp;
Tags:
<span class="badge badge-success">$tags</span>
</div>
<div class="col-lg-4 alert well">
Download:
<a href="index.json" title="JSON summary of archived link.">JSON</a> |
<a href="." title="Webserver-provided index of files directory.">Files</a> |
<a href="media/" title="Audio, Video, and Subtitle files.">Media</a> |
<a href="git/" title="Any git repos at the url">Git Code</a>
</div>
<hr/>
<div class="col-lg-2">
<div class="card selected-card">
<iframe class="card-img-top" src="$archive_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$archive_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_url" target="preview"><h4 class="card-title">Local Archive</h4></a>
<p class="card-text">archive/$domain</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$dom_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$dom_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$dom_url" target="preview"><h4 class="card-title">HTML</h4></a>
<p class="card-text">archive/output.html</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$pdf_url"></iframe>
<div class="card-body">
<a href="$pdf_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$pdf_url" target="preview" id="pdf-btn"><h4 class="card-title">PDF</h4></a>
<p class="card-text">archive/output.pdf</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$screenshot_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$screenshot_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$screenshot_url" target="preview"><h4 class="card-title">Screenshot</h4></a>
<p class="card-text">archive/screenshot.png</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$url" target="preview"><h4 class="card-title">Original</h4></a>
<p class="card-text">$domain</p>
</div>
</div>
</div>
<div class="col-lg-2">
<div class="card">
<iframe class="card-img-top" src="$archive_org_url" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
<div class="card-body">
<a href="$archive_org_url" style="float:right" title="Open in new tab..." target="_blank" rel="noopener">
<img src="../../static/external.png" class="external"/>
</a>
<a href="$archive_org_url" target="preview"><h4 class="card-title">Archive.Org</h4></a>
<p class="card-text">web.archive.org/web/...</p>
</div>
</div>
</div>
</div>
</div>
<iframe sandbox="allow-same-origin allow-scripts allow-forms" class="full-page-iframe" src="$archive_url" name="preview"></iframe>
<script
src="https://code.jquery.com/jquery-3.2.1.slim.min.js"
integrity="sha256-k2WSCIexGzOj3Euiig+TlR8gA0EmPjuc79OEeY5L45g="
crossorigin="anonymous"></script>
<script src="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0-alpha.6/js/bootstrap.min.js" integrity="sha384-vBWWzlZJ8ea9aCX4pEW3rVHjgjt7zpkNpZk+02D9phzyeVkE+jo0ieGizqPLForn" crossorigin="anonymous"></script>
<script>
// show selected file in iframe when preview card is clicked
jQuery('.card').on('click', function(e) {
jQuery('.selected-card').removeClass('selected-card')
jQuery(e.target).closest('.card').addClass('selected-card')
})
jQuery('.card a[target=preview]').on('click', function(e) {
if (e.currentTarget.href.endsWith('.pdf')) {
jQuery('.full-page-iframe')[0].removeAttribute('sandbox')
} else {
jQuery('.full-page-iframe')[0].sandbox = "allow-same-origin allow-scripts allow-forms"
}
return true
})
// un-sandbox iframes showing pdfs (required to display pdf viewer)
jQuery('iframe').map(function() {
if (this.src.endsWith('.pdf')) {
this.removeAttribute('sandbox')
this.src = this.src
}
})
// hide header when collapse icon is clicked
jQuery('.collapse-icon').on('click', function() {
if (jQuery('.collapse-icon').text().includes('▾')) {
jQuery('.collapse-icon').text('▸')
jQuery('.site-header').hide()
jQuery('.full-page-iframe').addClass('iframe-large')
} else {
jQuery('.collapse-icon').text('▾')
jQuery('.site-header').show()
jQuery('.full-page-iframe').removeClass('iframe-large')
}
return true
})
// hide all preview iframes on small screens
if (window.innerWidth < 1091) {
jQuery('.card a[target=preview]').attr('target', '_self')
}
</script>
</body>
</html>

View file

@ -36,6 +36,7 @@ from config import (
FETCH_GIT,
FETCH_MEDIA,
SUBMIT_ARCHIVE_DOT_ORG,
ARCHIVE_DIR_NAME,
)
### Parsing Helpers
@ -271,7 +272,7 @@ def wget_output_path(link, look_in=None):
if re.search(".+\\.[Hh][Tt][Mm][Ll]?$", f, re.I | re.M)
]
if html_files:
return urlencode(os.path.join('archive', link['timestamp'], *wget_folder, html_files[0]))
return urlencode(os.path.join(*wget_folder, html_files[0]))
return None
@ -389,6 +390,7 @@ def derived_link_info(link):
extended_info = {
**link,
'link_dir': '{}/{}'.format(ARCHIVE_DIR_NAME, link['timestamp']),
'bookmarked_date': to_date_str(link['timestamp']),
'updated_date': to_date_str(link['updated']) if 'updated' in link else None,
'domain': domain(url),
@ -400,17 +402,17 @@ def derived_link_info(link):
# Archive Method Output URLs
extended_info = {
**extended_info,
'favicon_url': 'archive/{timestamp}/favicon.ico'.format(**extended_info),
'index_url': 'index.html',
'favicon_url': 'favicon.ico',
'google_favicon_url': 'https://www.google.com/s2/favicons?domain={domain}'.format(**extended_info),
'files_url': 'archive/{timestamp}/index.html'.format(**extended_info),
'archive_url': wget_output_path(link) or 'archive/{}/index.html'.format(link['timestamp']),
'warc_url': 'archive/{timestamp}/warc'.format(**extended_info),
'pdf_url': 'archive/{timestamp}/output.pdf'.format(**extended_info),
'screenshot_url': 'archive/{timestamp}/screenshot.png'.format(**extended_info),
'dom_url': 'archive/{timestamp}/output.html'.format(**extended_info),
'archive_url': wget_output_path(link) or 'index.html',
'warc_url': 'warc',
'pdf_url': 'output.pdf',
'screenshot_url': 'screenshot.png',
'dom_url': 'output.html',
'archive_org_url': 'https://web.archive.org/web/{base_url}'.format(**extended_info),
'git_url': 'archive/{timestamp}/git'.format(**extended_info),
'media_url': 'archive/{timestamp}/media'.format(**extended_info),
'git_url': 'git',
'media_url': 'media',
}
@ -419,10 +421,10 @@ def derived_link_info(link):
if link['type'] in ('PDF', 'image'):
extended_info.update({
'title': basename(link['url']),
'archive_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'pdf_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'screenshot_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'dom_url': 'archive/{timestamp}/{base_url}'.format(**extended_info),
'archive_url': base_url(url),
'pdf_url': base_url(url),
'screenshot_url': base_url(url),
'dom_url': base_url(url),
})
return extended_info