From f3a3d76439ccfb822197d086c94520686756aa5a Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Sat, 27 Mar 2021 04:30:15 -0400 Subject: [PATCH] support adding urls with tags directly via CLI and add page --- archivebox/core/forms.py | 1 + archivebox/core/models.py | 6 +++++- archivebox/core/views.py | 2 ++ archivebox/main.py | 26 +++++++++++++++----------- archivebox/templates/core/add.html | 2 +- archivebox/templates/static/add.css | 2 +- 6 files changed, 25 insertions(+), 14 deletions(-) diff --git a/archivebox/core/forms.py b/archivebox/core/forms.py index 5521ff1d..e3e904df 100644 --- a/archivebox/core/forms.py +++ b/archivebox/core/forms.py @@ -20,6 +20,7 @@ ARCHIVE_METHODS = [ class AddLinkForm(forms.Form): url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True) + tag = forms.CharField(label="Tags (comma separated tag1,tag2,tag3)", strip=True, required=False) depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, initial='0', widget=forms.RadioSelect(attrs={"class": "depth-selection"})) archive_methods = forms.MultipleChoiceField( label="Archive methods (select at least 1, otherwise all will be used by default)", diff --git a/archivebox/core/models.py b/archivebox/core/models.py index d3495826..e7741920 100644 --- a/archivebox/core/models.py +++ b/archivebox/core/models.py @@ -116,9 +116,13 @@ class Snapshot(models.Model): from ..index import load_link_details return load_link_details(self.as_link()) - def tags_str(self) -> str: + def tags_str(self, nocache=True) -> str: cache_key = f'{self.id}-{(self.updated or self.added).timestamp()}-tags' calc_tags_str = lambda: ','.join(self.tags.order_by('name').values_list('name', flat=True)) + if nocache: + tags_str = calc_tags_str() + cache.set(cache_key, tags_str) + return tags_str return cache.get_or_set(cache_key, calc_tags_str) @cached_property diff --git a/archivebox/core/views.py b/archivebox/core/views.py index 801676b3..36794a8d 100644 --- a/archivebox/core/views.py +++ b/archivebox/core/views.py @@ -267,10 +267,12 @@ class AddView(UserPassesTestMixin, FormView): def form_valid(self, form): url = form.cleaned_data["url"] print(f'[+] Adding URL: {url}') + tag = form.cleaned_data["tag"] depth = 0 if form.cleaned_data["depth"] == "0" else 1 extractors = ','.join(form.cleaned_data["archive_methods"]) input_kwargs = { "urls": url, + "tag": tag, "depth": depth, "update_all": False, "out_dir": OUTPUT_DIR, diff --git a/archivebox/main.py b/archivebox/main.py index 3b52a179..884f5b46 100644 --- a/archivebox/main.py +++ b/archivebox/main.py @@ -611,17 +611,6 @@ def add(urls: Union[str, List[str]], write_main_index(links=new_links, out_dir=out_dir) all_links = load_main_index(out_dir=out_dir) - # add any tags to imported links - tags = [ - Tag.objects.get_or_create(name=name.strip()) - for name in tag.split(',') - if name.strip() - ] - if tags: - for link in imported_links: - link.as_snapshot().tags.add(*tags) - - if index_only: # mock archive all the links using the fake index_only extractor method in order to update their state if overwrite: @@ -644,6 +633,21 @@ def add(urls: Union[str, List[str]], archive_links(new_links, overwrite=False, **archive_kwargs) + # add any tags to imported links + tags = [ + Tag.objects.get_or_create(name=name.strip())[0] + for name in tag.split(',') + if name.strip() + ] + if tags: + for link in imported_links: + snapshot = link.as_snapshot() + snapshot.tags.add(*tags) + tags_str = snapshot.tags_str(nocache=True) + snapshot.save() + # print(f' √ Tagged {len(imported_links)} Snapshots with {len(tags)} tags {tags_str}') + + return all_links @enforce_types diff --git a/archivebox/templates/core/add.html b/archivebox/templates/core/add.html index 0f161885..8123ef7a 100644 --- a/archivebox/templates/core/add.html +++ b/archivebox/templates/core/add.html @@ -15,7 +15,7 @@ {% endblock %} {% block body %} -
+


{% if stdout %}

Add new URLs to your archive: results

diff --git a/archivebox/templates/static/add.css b/archivebox/templates/static/add.css index 875c61bc..21645e70 100644 --- a/archivebox/templates/static/add.css +++ b/archivebox/templates/static/add.css @@ -42,7 +42,7 @@ header { background-color: #f5dd5d; } #stdout { - background-color: #ded; + background-color: #fbfbfb; padding: 10px 10px; border-radius: 4px; white-space: normal;