add tag cli option

This commit is contained in:
Nick Sweeting 2021-03-27 03:57:05 -04:00
parent 334061f17e
commit fea0b89dbe
3 changed files with 39 additions and 13 deletions

View file

@ -22,6 +22,12 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
add_help=True, add_help=True,
formatter_class=SmartFormatter, formatter_class=SmartFormatter,
) )
parser.add_argument(
'--tag', '-t',
type=str,
default='',
help="Tag the added URLs with the provided tags e.g. --tag=tag1,tag2,tag3",
)
parser.add_argument( parser.add_argument(
'--update-all', #'-n', '--update-all', #'-n',
action='store_true', action='store_true',
@ -89,6 +95,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
add( add(
urls=stdin_urls or urls, urls=stdin_urls or urls,
depth=command.depth, depth=command.depth,
tag=command.tag,
update_all=command.update_all, update_all=command.update_all,
index_only=command.index_only, index_only=command.index_only,
overwrite=command.overwrite, overwrite=command.overwrite,

View file

@ -33,8 +33,11 @@ class Tag(models.Model):
Based on django-taggit model Based on django-taggit model
""" """
name = models.CharField(unique=True, blank=False, max_length=100) name = models.CharField(unique=True, blank=False, max_length=100)
# slug is autoset on save from name, never set it manually
slug = models.SlugField(unique=True, blank=True, max_length=100) slug = models.SlugField(unique=True, blank=True, max_length=100)
class Meta: class Meta:
verbose_name = "Tag" verbose_name = "Tag"
verbose_name_plural = "Tags" verbose_name_plural = "Tags"

View file

@ -561,6 +561,7 @@ def oneshot(url: str, extractors: str="", out_dir: Path=OUTPUT_DIR):
@enforce_types @enforce_types
def add(urls: Union[str, List[str]], def add(urls: Union[str, List[str]],
tag: str='',
depth: int=0, depth: int=0,
update_all: bool=not ONLY_NEW, update_all: bool=not ONLY_NEW,
index_only: bool=False, index_only: bool=False,
@ -570,6 +571,8 @@ def add(urls: Union[str, List[str]],
out_dir: Path=OUTPUT_DIR) -> List[Link]: out_dir: Path=OUTPUT_DIR) -> List[Link]:
"""Add a new URL or list of URLs to your archive""" """Add a new URL or list of URLs to your archive"""
from core.models import Tag
assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)' assert depth in (0, 1), 'Depth must be 0 or 1 (depth >1 is not supported yet)'
extractors = extractors.split(",") if extractors else [] extractors = extractors.split(",") if extractors else []
@ -602,31 +605,44 @@ def add(urls: Union[str, List[str]],
new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url) new_links_depth += parse_links_from_source(downloaded_file, root_url=new_link.url)
imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values()) imported_links = list({link.url: link for link in (new_links + new_links_depth)}.values())
new_links = dedupe_links(all_links, imported_links) new_links = dedupe_links(all_links, imported_links)
write_main_index(links=new_links, out_dir=out_dir) write_main_index(links=new_links, out_dir=out_dir)
all_links = load_main_index(out_dir=out_dir) all_links = load_main_index(out_dir=out_dir)
# add any tags to imported links
tags = [
Tag.objects.get_or_create(name=name.strip())
for name in tag.split(',')
if name.strip()
]
if tags:
for link in imported_links:
link.as_snapshot().tags.add(*tags)
if index_only: if index_only:
# mock archive all the links using the fake index_only extractor method in order to update their state
if overwrite: if overwrite:
archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir) archive_links(imported_links, overwrite=overwrite, methods=['index_only'], out_dir=out_dir)
else: else:
archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir) archive_links(new_links, overwrite=False, methods=['index_only'], out_dir=out_dir)
return all_links else:
# fully run the archive extractor methods for each link
archive_kwargs = {
"out_dir": out_dir,
}
if extractors:
archive_kwargs["methods"] = extractors
# Run the archive methods for each link if update_all:
archive_kwargs = { archive_links(all_links, overwrite=overwrite, **archive_kwargs)
"out_dir": out_dir, elif overwrite:
} archive_links(imported_links, overwrite=True, **archive_kwargs)
if extractors: elif new_links:
archive_kwargs["methods"] = extractors archive_links(new_links, overwrite=False, **archive_kwargs)
if update_all:
archive_links(all_links, overwrite=overwrite, **archive_kwargs)
elif overwrite:
archive_links(imported_links, overwrite=True, **archive_kwargs)
elif new_links:
archive_links(new_links, overwrite=False, **archive_kwargs)
return all_links return all_links