diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 3bfc15a7..db48f41c 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -40,6 +40,7 @@ from config import ( CHROME_VERSION, GIT_VERSION, YOUTUBEDL_VERSION, + ONLY_NEW, ) from util import ( enforce_types, @@ -87,33 +88,40 @@ def archive_link(link: Link, page=None) -> Link: link = load_json_link_index(link.link_dir, link) log_link_archiving_started(link.link_dir, link, is_new) + link = link.overwrite(updated=datetime.now()) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} for method_name, should_run, method_function in ARCHIVE_METHODS: - if method_name not in link.history: - link.history[method_name] = [] - - if should_run(link.link_dir, link): - log_archive_method_started(method_name) + try: + if method_name not in link.history: + link.history[method_name] = [] + + if should_run(link.link_dir, link): + log_archive_method_started(method_name) - result = method_function(link.link_dir, link) + result = method_function(link.link_dir, link) - link.history[method_name].append(result) + link.history[method_name].append(result) - stats[result.status] += 1 - log_archive_method_finished(result) - else: - stats['skipped'] += 1 + stats[result.status] += 1 + log_archive_method_finished(result) + else: + stats['skipped'] += 1 + except Exception as e: + raise Exception('Exception in archive_methods.fetch_{}(Link(url={}))'.format( + method_name, + link.url, + )) from e # print(' ', stats) - link = Link(**{ - **link._asdict(), - 'updated': datetime.now(), - }) - + # If any changes were made, update the link index json and html write_link_index(link.link_dir, link) - patch_links_index(link) + + was_changed = stats['succeeded'] or stats['failed'] + if was_changed: + patch_links_index(link) + log_link_archiving_finished(link.link_dir, link, is_new, stats) except KeyboardInterrupt: diff --git a/archivebox/index.py b/archivebox/index.py index 2bf2b5eb..74e7dd42 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -154,7 +154,7 @@ def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False link.title or (link.base_url if link.is_archived else TITLE_LOADING_MSG) ), - 'tags': link.tags or '', + 'tags': (link.tags or '') + (' {}'.format(link.extension) if link.is_static else ''), 'favicon_url': ( os.path.join('archive', link.timestamp, 'favicon.ico') # if link['is_archived'] else 'data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs=' @@ -196,12 +196,11 @@ def patch_links_index(link: Link, out_dir: str=OUTPUT_DIR) -> None: patched_links = [] for saved_link in json_file_links: if saved_link.url == link.url: - patched_links.append(Link(**{ - **saved_link._asdict(), - 'title': title, - 'history': link.history, - 'updated': link.updated, - })) + patched_links.append(saved_link.overwrite( + title=title, + history=link.history, + updated=link.updated, + )) else: patched_links.append(saved_link) @@ -283,7 +282,7 @@ def write_html_link_index(out_dir: str, link: Link) -> None: ), 'extension': link.extension or 'html', 'tags': link.tags or 'untagged', - 'status': 'Archived' if link.is_archived else 'Not yet archived', + 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', })) diff --git a/archivebox/logs.py b/archivebox/logs.py index fd1f0bc5..b2913c18 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -131,7 +131,7 @@ def log_link_archiving_started(link_dir: str, link: Link, is_new: bool): print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( symbol_color=ANSI['green' if is_new else 'black'], - symbol='+' if is_new else '*', + symbol='+' if is_new else '√', now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), title=link.title or link.base_url, **ANSI, diff --git a/archivebox/schema.py b/archivebox/schema.py index fa110653..434f9dc5 100644 --- a/archivebox/schema.py +++ b/archivebox/schema.py @@ -59,6 +59,10 @@ class Link: object.__setattr__(self, 'history', cast_history) + def overwrite(self, **kwargs): + """pure functional version of dict.update that returns a new instance""" + return Link(**{**self._asdict(), **kwargs}) + def __eq__(self, other): if not isinstance(other, Link): return NotImplemented @@ -96,6 +100,9 @@ class Link: 'is_static': self.is_static, 'is_archived': self.is_archived, 'num_outputs': self.num_outputs, + 'num_failures': self.num_failures, + 'oldest_archive_date': self.oldest_archive_date, + 'newest_archive_date': self.newest_archive_date, }) return info @@ -152,11 +159,42 @@ class Link: from util import ts_to_date return ts_to_date(self.updated) if self.updated else None + @property + def oldest_archive_date(self) -> Optional[datetime]: + from util import ts_to_date + + most_recent = min( + (result.start_ts + for method in self.history.keys() + for result in self.history[method]), + default=None, + ) + return ts_to_date(most_recent) if most_recent else None + + @property + def newest_archive_date(self) -> Optional[datetime]: + from util import ts_to_date + + most_recent = max( + (result.start_ts + for method in self.history.keys() + for result in self.history[method]), + default=None, + ) + return ts_to_date(most_recent) if most_recent else None + ### Archive Status Helpers @property def num_outputs(self) -> int: return len(tuple(filter(None, self.latest_outputs().values()))) + @property + def num_failures(self) -> int: + return sum(1 + for method in self.history.keys() + for result in self.history[method] + if result.status == 'failed') + @property def is_static(self) -> bool: from util import is_static_file diff --git a/archivebox/templates/index.html b/archivebox/templates/index.html index 264deb4d..f5cf2785 100644 --- a/archivebox/templates/index.html +++ b/archivebox/templates/index.html @@ -13,51 +13,64 @@ padding: 0px; font-family: "Gill Sans", Helvetica, sans-serif; } - header { - background-color: #aa1e55; - color: #1a1a1a; - padding: 10px; - padding-top: 0px; - padding-bottom: 15px; - /*height: 40px;*/ + .header-top small { + font-weight: 200; + color: #efefef; } - header h1 { - margin: 7px 0px; - font-size: 35px; - font-weight: 300; - color: #1a1a1a; - } - header h1 img { - height: 44px; - vertical-align: bottom; - } - header a { - text-decoration: none !important; - color: #1a1a1a; - } - .header-center { - margin: auto; - float: none; + + .header-top { + width: 100%; + height: auto; + min-height: 40px; + margin: 0px; text-align: center; - padding-top: 6px; + color: white; + font-size: calc(11px + 0.86vw); + font-weight: 200; + padding: 4px 4px; + border-bottom: 3px solid #aa1e55; + background-color: #aa1e55; } - .header-center small { - color: #eaeaea; - opacity: 0.7; + input[type=search] { + width: 22vw; + border-radius: 4px; + border: 1px solid #aeaeae; + padding: 3px 5px; } - .header-left { - float: left; + .nav > div { + min-height: 30px; } - .header-right { - float: right; - padding-top: 17px; - padding-right: 10px; + .header-top a { + text-decoration: none; + color: rgba(0,0,0,0.6); } - header + div { - margin-top: 10px; + .header-top a:hover { + text-decoration: none; + color: rgba(0,0,0,0.9); } + .header-top .col-lg-4 { + text-align: center; + padding-top: 4px; + padding-bottom: 4px; + } + .header-archivebox img { + display: inline-block; + margin-right: 3px; + height: 30px; + margin-left: 12px; + margin-top: -4px; + margin-bottom: 2px; + } + .header-archivebox img:hover { + opacity: 0.5; + } + #table-bookmarks_length, #table-bookmarks_filter { - padding: 0px 15px; + padding-top: 12px; + opacity: 0.8; + padding-left: 24px; + padding-right: 22px; + margin-bottom: -16px; } table { padding: 6px; @@ -98,6 +111,9 @@ overflow-y: scroll; table-layout: fixed; } + .dataTables_wrapper { + background-color: #fafafa; + } table tr a span[data-archived~=False] { opacity: 0.4; } @@ -131,7 +147,11 @@ border-radius: 4px; float:right } + input[type=search]::-webkit-search-cancel-button { + -webkit-appearance: searchfield-cancel-button; + } + @@ -151,21 +171,20 @@
diff --git a/archivebox/templates/link_index.html b/archivebox/templates/link_index.html
index 95aa6bb1..6309fb14 100644
--- a/archivebox/templates/link_index.html
+++ b/archivebox/templates/link_index.html
@@ -6,69 +6,72 @@
html, body {
width: 100%;
height: 100%;
- }
- body {
background-color: #ddd;
}
header {
- width: 100%;
- height: 90px;
background-color: #aa1e55;
+ padding-bottom: 12px;
+ }
+ small {
+ font-weight: 200;
+ }
+ .header-top {
+ width: 100%;
+ height: auto;
+ min-height: 40px;
margin: 0px;
text-align: center;
color: white;
- }
- header h1 {
- padding-top: 5px;
- padding-bottom: 5px;
- margin: 0px;
+ font-size: calc(11px + 0.86vw);
font-weight: 200;
- font-family: "Gill Sans", Helvetica, sans-serif;
- font-size: calc(16px + 1vw);
+ padding: 4px 4px;
+ background-color: #aa1e55;
}
- .collapse-icon {
- float: right;
- color: black;
- width: 126px;
- font-size: 0.8em;
- margin-top: 20px;
- margin-right: 0px;
- margin-left: -35px;
+ .nav > div {
+ min-height: 30px;
+ margin: 8px 0px;
}
- .nav-icon img {
- float: left;
- display: block;
- margin-right: 13px;
- color: black;
- height: 53px;
- margin-top: 12px;
- margin-left: 10px;
+ .header-top a {
+ text-decoration: none;
+ color: rgba(0,0,0,0.6);
}
- .nav-icon img:hover {
+ .header-top a:hover {
+ text-decoration: none;
+ color: rgba(0,0,0,0.9);
+ }
+ .header-top .col-lg-4 {
+ text-align: center;
+ padding-top: 4px;
+ padding-bottom: 4px;
+ }
+ .header-archivebox img {
+ display: inline-block;
+ margin-right: 3px;
+ height: 30px;
+ margin-left: 12px;
+ margin-top: -4px;
+ margin-bottom: 2px;
+ }
+ .header-archivebox img:hover {
opacity: 0.5;
}
- .title-url {
- color: black;
- display: block;
- width: 75%;
+ .header-url small {
white-space: nowrap;
- overflow: hidden;
- margin: auto;
+ font-weight: 200;
}
- .archive-page-header {
- margin-top: 5px;
+ .header-url img {
+ height: 20px;
+ vertical-align: -2px;
+ margin-right: 4px;
+ }
+
+ .info-row {
+ margin-top: 2px;
margin-bottom: 5px;
}
- .archive-page-header .alert {
+ .info-row .alert {
margin-bottom: 0px;
}
- h1 small {
- opacity: 0.4;
- font-size: 0.6em;
- }
- h1 small:hover {
- opacity: 0.8;
- }
.card {
overflow: hidden;
box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02);
@@ -87,18 +90,24 @@
max-height: 102px;
overflow: hidden;
}
+ .card-title {
+ margin-bottom: 4px;
+ }
.card-img-top {
border: 0px;
padding: 0px;
margin: 0px;
overflow: hidden;
opacity: 0.8;
- border-top: 1px solid gray;
- border-radius: 3px;
- border-bottom: 1px solid #ddd;
+ border-top: 1px solid rgba(0,0,0,0);
+ border-radius: 4px;
+ border-bottom: 1px solid rgba(0,0,0,0);
height: 430px;
- width: 400%;
+ width: 405%;
margin-bottom: -330px;
+ background-color: #333;
+ margin-left: -1%;
+ margin-right: -1%;
transform: scale(0.25);
transform-origin: 0 0;
@@ -116,8 +125,7 @@
box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05);
}
.iframe-large {
- height: 93%;
- margin-top: -10px;
+ height: calc(100% - 40px);
}
.pdf-frame {
transform: none;
@@ -125,6 +133,9 @@
height: 160px;
margin-top: -60px;
margin-bottom: 0px;
+ transform: scale(1.1);
+ width: 100%;
+ margin-left: -10%;
}
img.external {
height: 30px;
@@ -138,13 +149,61 @@
border: 4px solid green;
}
.screenshot {
+ background-color: #333;
transform: none;
width: 100%;
- height: auto;
+ min-height: 100px;
max-height: 100px;
margin-bottom: 0px;
object-fit: cover;
- object-position: top;
+ object-position: top center;
+ }
+ .header-bottom {
+ border-top: 1px solid rgba(170, 30, 85, 0.9);
+ padding-bottom: 12px;
+ border-bottom: 5px solid rgb(170, 30, 85);
+ margin-bottom: -1px;
+
+ border-radius: 4px;
+ background-color: rgba(23, 22, 22, 0.88);
+ width: 98%;
+ border: 1px solid rgba(0,0,0,0.2);
+ box-shadow: 4px 4px 4px rgba(0,0,0,0.2);
+ margin-top: 5px;
+ }
+ .header-bottom-info {
+ color: #6f6f6f;
+ padding-top: 8px;
+ padding-bottom: 13px;
+ }
+
+ .header-bottom-info > div {
+ text-align: center;
+ }
+ .header-bottom-info h5 {
+ font-size: 1.1em;
+ font-weight: 200;
+ margin-top: 3px;
+ margin-bottom: 3px;
+ color: rgba(255, 255, 255, 0.74);
+ }
+ .info-chunk {
+ width: auto;
+ display:inline-block;
+ text-align: center;
+ margin: 10px 10px;
+ vertical-align: top;
+ }
+ .info-chunk .badge {
+ margin-top: 5px;
+ }
+ .header-bottom-frames .card-title {
+ padding-bottom: 0px;
+ font-size: 1.2vw;
+ margin-bottom: 5px;
+ }
+ .header-bottom-frames .card-text {
+ font-size: 0.9em;
}
@media(max-width: 1092px) {
@@ -164,131 +223,170 @@
.card {
margin-bottom: 5px;
}
- header > h1 > a.collapse-icon, header > h1 > a.nav-icon {
+ header > h1 > a.header-url, header > h1 > a.header-archivebox {
display: none;
}
}
-
+
-
-
-
-
- ▾
-
- $title
- |