diff --git a/archivebox/archive_methods.py b/archivebox/archive_methods.py index 3bfc15a7..db48f41c 100644 --- a/archivebox/archive_methods.py +++ b/archivebox/archive_methods.py @@ -40,6 +40,7 @@ from config import ( CHROME_VERSION, GIT_VERSION, YOUTUBEDL_VERSION, + ONLY_NEW, ) from util import ( enforce_types, @@ -87,33 +88,40 @@ def archive_link(link: Link, page=None) -> Link: link = load_json_link_index(link.link_dir, link) log_link_archiving_started(link.link_dir, link, is_new) + link = link.overwrite(updated=datetime.now()) stats = {'skipped': 0, 'succeeded': 0, 'failed': 0} for method_name, should_run, method_function in ARCHIVE_METHODS: - if method_name not in link.history: - link.history[method_name] = [] - - if should_run(link.link_dir, link): - log_archive_method_started(method_name) + try: + if method_name not in link.history: + link.history[method_name] = [] + + if should_run(link.link_dir, link): + log_archive_method_started(method_name) - result = method_function(link.link_dir, link) + result = method_function(link.link_dir, link) - link.history[method_name].append(result) + link.history[method_name].append(result) - stats[result.status] += 1 - log_archive_method_finished(result) - else: - stats['skipped'] += 1 + stats[result.status] += 1 + log_archive_method_finished(result) + else: + stats['skipped'] += 1 + except Exception as e: + raise Exception('Exception in archive_methods.fetch_{}(Link(url={}))'.format( + method_name, + link.url, + )) from e # print(' ', stats) - link = Link(**{ - **link._asdict(), - 'updated': datetime.now(), - }) - + # If any changes were made, update the link index json and html write_link_index(link.link_dir, link) - patch_links_index(link) + + was_changed = stats['succeeded'] or stats['failed'] + if was_changed: + patch_links_index(link) + log_link_archiving_finished(link.link_dir, link, is_new, stats) except KeyboardInterrupt: diff --git a/archivebox/index.py b/archivebox/index.py index 2bf2b5eb..74e7dd42 100644 --- a/archivebox/index.py +++ b/archivebox/index.py @@ -154,7 +154,7 @@ def write_html_links_index(out_dir: str, links: List[Link], finished: bool=False link.title or (link.base_url if link.is_archived else TITLE_LOADING_MSG) ), - 'tags': link.tags or '', + 'tags': (link.tags or '') + (' {}'.format(link.extension) if link.is_static else ''), 'favicon_url': ( os.path.join('archive', link.timestamp, 'favicon.ico') # if link['is_archived'] else 'data:image/gif;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs=' @@ -196,12 +196,11 @@ def patch_links_index(link: Link, out_dir: str=OUTPUT_DIR) -> None: patched_links = [] for saved_link in json_file_links: if saved_link.url == link.url: - patched_links.append(Link(**{ - **saved_link._asdict(), - 'title': title, - 'history': link.history, - 'updated': link.updated, - })) + patched_links.append(saved_link.overwrite( + title=title, + history=link.history, + updated=link.updated, + )) else: patched_links.append(saved_link) @@ -283,7 +282,7 @@ def write_html_link_index(out_dir: str, link: Link) -> None: ), 'extension': link.extension or 'html', 'tags': link.tags or 'untagged', - 'status': 'Archived' if link.is_archived else 'Not yet archived', + 'status': 'archived' if link.is_archived else 'not yet archived', 'status_color': 'success' if link.is_archived else 'danger', })) diff --git a/archivebox/logs.py b/archivebox/logs.py index fd1f0bc5..b2913c18 100644 --- a/archivebox/logs.py +++ b/archivebox/logs.py @@ -131,7 +131,7 @@ def log_link_archiving_started(link_dir: str, link: Link, is_new: bool): print('\n[{symbol_color}{symbol}{reset}] [{symbol_color}{now}{reset}] "{title}"'.format( symbol_color=ANSI['green' if is_new else 'black'], - symbol='+' if is_new else '*', + symbol='+' if is_new else '√', now=datetime.now().strftime('%Y-%m-%d %H:%M:%S'), title=link.title or link.base_url, **ANSI, diff --git a/archivebox/schema.py b/archivebox/schema.py index fa110653..434f9dc5 100644 --- a/archivebox/schema.py +++ b/archivebox/schema.py @@ -59,6 +59,10 @@ class Link: object.__setattr__(self, 'history', cast_history) + def overwrite(self, **kwargs): + """pure functional version of dict.update that returns a new instance""" + return Link(**{**self._asdict(), **kwargs}) + def __eq__(self, other): if not isinstance(other, Link): return NotImplemented @@ -96,6 +100,9 @@ class Link: 'is_static': self.is_static, 'is_archived': self.is_archived, 'num_outputs': self.num_outputs, + 'num_failures': self.num_failures, + 'oldest_archive_date': self.oldest_archive_date, + 'newest_archive_date': self.newest_archive_date, }) return info @@ -152,11 +159,42 @@ class Link: from util import ts_to_date return ts_to_date(self.updated) if self.updated else None + @property + def oldest_archive_date(self) -> Optional[datetime]: + from util import ts_to_date + + most_recent = min( + (result.start_ts + for method in self.history.keys() + for result in self.history[method]), + default=None, + ) + return ts_to_date(most_recent) if most_recent else None + + @property + def newest_archive_date(self) -> Optional[datetime]: + from util import ts_to_date + + most_recent = max( + (result.start_ts + for method in self.history.keys() + for result in self.history[method]), + default=None, + ) + return ts_to_date(most_recent) if most_recent else None + ### Archive Status Helpers @property def num_outputs(self) -> int: return len(tuple(filter(None, self.latest_outputs().values()))) + @property + def num_failures(self) -> int: + return sum(1 + for method in self.history.keys() + for result in self.history[method] + if result.status == 'failed') + @property def is_static(self) -> bool: from util import is_static_file diff --git a/archivebox/templates/index.html b/archivebox/templates/index.html index 264deb4d..f5cf2785 100644 --- a/archivebox/templates/index.html +++ b/archivebox/templates/index.html @@ -13,51 +13,64 @@ padding: 0px; font-family: "Gill Sans", Helvetica, sans-serif; } - header { - background-color: #aa1e55; - color: #1a1a1a; - padding: 10px; - padding-top: 0px; - padding-bottom: 15px; - /*height: 40px;*/ + .header-top small { + font-weight: 200; + color: #efefef; } - header h1 { - margin: 7px 0px; - font-size: 35px; - font-weight: 300; - color: #1a1a1a; - } - header h1 img { - height: 44px; - vertical-align: bottom; - } - header a { - text-decoration: none !important; - color: #1a1a1a; - } - .header-center { - margin: auto; - float: none; + + .header-top { + width: 100%; + height: auto; + min-height: 40px; + margin: 0px; text-align: center; - padding-top: 6px; + color: white; + font-size: calc(11px + 0.86vw); + font-weight: 200; + padding: 4px 4px; + border-bottom: 3px solid #aa1e55; + background-color: #aa1e55; } - .header-center small { - color: #eaeaea; - opacity: 0.7; + input[type=search] { + width: 22vw; + border-radius: 4px; + border: 1px solid #aeaeae; + padding: 3px 5px; } - .header-left { - float: left; + .nav > div { + min-height: 30px; } - .header-right { - float: right; - padding-top: 17px; - padding-right: 10px; + .header-top a { + text-decoration: none; + color: rgba(0,0,0,0.6); } - header + div { - margin-top: 10px; + .header-top a:hover { + text-decoration: none; + color: rgba(0,0,0,0.9); } + .header-top .col-lg-4 { + text-align: center; + padding-top: 4px; + padding-bottom: 4px; + } + .header-archivebox img { + display: inline-block; + margin-right: 3px; + height: 30px; + margin-left: 12px; + margin-top: -4px; + margin-bottom: 2px; + } + .header-archivebox img:hover { + opacity: 0.5; + } + #table-bookmarks_length, #table-bookmarks_filter { - padding: 0px 15px; + padding-top: 12px; + opacity: 0.8; + padding-left: 24px; + padding-right: 22px; + margin-bottom: -16px; } table { padding: 6px; @@ -98,6 +111,9 @@ overflow-y: scroll; table-layout: fixed; } + .dataTables_wrapper { + background-color: #fafafa; + } table tr a span[data-archived~=False] { opacity: 0.4; } @@ -131,7 +147,11 @@ border-radius: 4px; float:right } + input[type=search]::-webkit-search-cancel-button { + -webkit-appearance: searchfield-cancel-button; + } + @@ -151,21 +171,20 @@
-
- Documentation   |   - Source   |   - Website -
-
- -

ArchiveBox: Index

-
-
-
- Archived Sites - -
- Last updated $time_updated +
+
diff --git a/archivebox/templates/index_row.html b/archivebox/templates/index_row.html index 766f8038..41c6e1ea 100644 --- a/archivebox/templates/index_row.html +++ b/archivebox/templates/index_row.html @@ -4,7 +4,7 @@ $title - $tags + $tags
diff --git a/archivebox/templates/link_index.html b/archivebox/templates/link_index.html index 95aa6bb1..6309fb14 100644 --- a/archivebox/templates/link_index.html +++ b/archivebox/templates/link_index.html @@ -6,69 +6,72 @@ html, body { width: 100%; height: 100%; - } - body { background-color: #ddd; } header { - width: 100%; - height: 90px; background-color: #aa1e55; + padding-bottom: 12px; + } + small { + font-weight: 200; + } + .header-top { + width: 100%; + height: auto; + min-height: 40px; margin: 0px; text-align: center; color: white; - } - header h1 { - padding-top: 5px; - padding-bottom: 5px; - margin: 0px; + font-size: calc(11px + 0.86vw); font-weight: 200; - font-family: "Gill Sans", Helvetica, sans-serif; - font-size: calc(16px + 1vw); + padding: 4px 4px; + background-color: #aa1e55; } - .collapse-icon { - float: right; - color: black; - width: 126px; - font-size: 0.8em; - margin-top: 20px; - margin-right: 0px; - margin-left: -35px; + .nav > div { + min-height: 30px; + margin: 8px 0px; } - .nav-icon img { - float: left; - display: block; - margin-right: 13px; - color: black; - height: 53px; - margin-top: 12px; - margin-left: 10px; + .header-top a { + text-decoration: none; + color: rgba(0,0,0,0.6); } - .nav-icon img:hover { + .header-top a:hover { + text-decoration: none; + color: rgba(0,0,0,0.9); + } + .header-top .col-lg-4 { + text-align: center; + padding-top: 4px; + padding-bottom: 4px; + } + .header-archivebox img { + display: inline-block; + margin-right: 3px; + height: 30px; + margin-left: 12px; + margin-top: -4px; + margin-bottom: 2px; + } + .header-archivebox img:hover { opacity: 0.5; } - .title-url { - color: black; - display: block; - width: 75%; + .header-url small { white-space: nowrap; - overflow: hidden; - margin: auto; + font-weight: 200; } - .archive-page-header { - margin-top: 5px; + .header-url img { + height: 20px; + vertical-align: -2px; + margin-right: 4px; + } + + .info-row { + margin-top: 2px; margin-bottom: 5px; } - .archive-page-header .alert { + .info-row .alert { margin-bottom: 0px; } - h1 small { - opacity: 0.4; - font-size: 0.6em; - } - h1 small:hover { - opacity: 0.8; - } .card { overflow: hidden; box-shadow: 2px 3px 14px 0px rgba(0,0,0,0.02); @@ -87,18 +90,24 @@ max-height: 102px; overflow: hidden; } + .card-title { + margin-bottom: 4px; + } .card-img-top { border: 0px; padding: 0px; margin: 0px; overflow: hidden; opacity: 0.8; - border-top: 1px solid gray; - border-radius: 3px; - border-bottom: 1px solid #ddd; + border-top: 1px solid rgba(0,0,0,0); + border-radius: 4px; + border-bottom: 1px solid rgba(0,0,0,0); height: 430px; - width: 400%; + width: 405%; margin-bottom: -330px; + background-color: #333; + margin-left: -1%; + margin-right: -1%; transform: scale(0.25); transform-origin: 0 0; @@ -116,8 +125,7 @@ box-shadow: 0px -6px 13px 1px rgba(0,0,0,0.05); } .iframe-large { - height: 93%; - margin-top: -10px; + height: calc(100% - 40px); } .pdf-frame { transform: none; @@ -125,6 +133,9 @@ height: 160px; margin-top: -60px; margin-bottom: 0px; + transform: scale(1.1); + width: 100%; + margin-left: -10%; } img.external { height: 30px; @@ -138,13 +149,61 @@ border: 4px solid green; } .screenshot { + background-color: #333; transform: none; width: 100%; - height: auto; + min-height: 100px; max-height: 100px; margin-bottom: 0px; object-fit: cover; - object-position: top; + object-position: top center; + } + .header-bottom { + border-top: 1px solid rgba(170, 30, 85, 0.9); + padding-bottom: 12px; + border-bottom: 5px solid rgb(170, 30, 85); + margin-bottom: -1px; + + border-radius: 4px; + background-color: rgba(23, 22, 22, 0.88); + width: 98%; + border: 1px solid rgba(0,0,0,0.2); + box-shadow: 4px 4px 4px rgba(0,0,0,0.2); + margin-top: 5px; + } + .header-bottom-info { + color: #6f6f6f; + padding-top: 8px; + padding-bottom: 13px; + } + + .header-bottom-info > div { + text-align: center; + } + .header-bottom-info h5 { + font-size: 1.1em; + font-weight: 200; + margin-top: 3px; + margin-bottom: 3px; + color: rgba(255, 255, 255, 0.74); + } + .info-chunk { + width: auto; + display:inline-block; + text-align: center; + margin: 10px 10px; + vertical-align: top; + } + .info-chunk .badge { + margin-top: 5px; + } + .header-bottom-frames .card-title { + padding-bottom: 0px; + font-size: 1.2vw; + margin-bottom: 5px; + } + .header-bottom-frames .card-text { + font-size: 0.9em; } @media(max-width: 1092px) { @@ -164,131 +223,170 @@ .card { margin-bottom: 5px; } - header > h1 > a.collapse-icon, header > h1 > a.nav-icon { + header > h1 > a.header-url, header > h1 > a.header-archivebox { display: none; } } - +
-

- - Archive Icon - - - ▾ - - $title
- - $base_url - -

-
-