Improved tags

This commit is contained in:
Angel Rey 2020-09-21 11:50:26 -05:00
parent 0158efb1d0
commit 62c9028212
11 changed files with 172 additions and 10 deletions

View file

@ -4,6 +4,7 @@ mypy-extensions==0.4.3
base32-crockford==0.3.0 base32-crockford==0.3.0
django==3.0.8 django==3.0.8
django-extensions==3.0.3 django-extensions==3.0.3
django-taggit==1.3.0
dateparser dateparser
ipython ipython
youtube-dl youtube-dl

View file

@ -66,6 +66,12 @@ class SnapshotAdmin(admin.ModelAdmin):
actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots] actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
actions_template = 'admin/actions_as_select.html' actions_template = 'admin/actions_as_select.html'
def get_queryset(self, request):
return super().get_queryset(request).prefetch_related('tags')
def tag_list(self, obj):
return u", ".join(o.name for o in obj.tags.all())
def id_str(self, obj): def id_str(self, obj):
return format_html( return format_html(
'<code style="font-size: 10px">{}</code>', '<code style="font-size: 10px">{}</code>',
@ -75,9 +81,9 @@ class SnapshotAdmin(admin.ModelAdmin):
def title_str(self, obj): def title_str(self, obj):
canon = obj.as_link().canonical_outputs() canon = obj.as_link().canonical_outputs()
tags = ''.join( tags = ''.join(
format_html('<span>{}</span>', tag.strip()) format_html(' <span>{}</span> ', tag)
for tag in obj.tags.split(',') for tag in obj.tags.all()
) if obj.tags else '' ) if obj.tags.all() else ''
return format_html( return format_html(
'<a href="/{}">' '<a href="/{}">'
'<img src="/{}/{}" class="favicon" onerror="this.remove()">' '<img src="/{}/{}" class="favicon" onerror="this.remove()">'

View file

@ -0,0 +1,89 @@
# Generated by Django 3.0.8 on 2020-09-15 20:06
from django.db import migrations, models
from django.contrib.contenttypes.models import ContentType
from django.utils.text import slugify
import django.db.models.deletion
import taggit.managers
def forwards_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TaggedItemModel = apps.get_model("core", "TaggedItem")
TagModel = apps.get_model("taggit", "Tag")
contents = ContentType.objects.all()
try:
ct = ContentType.objects.filter(app_label="core", model="snapshot")
except model.DoesNotExist: # Be explicit about exceptions
ct = None
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
tags = snapshot.tags
tag_set = (
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
)
tag_list = list(tag_set) or []
for tag in tag_list:
new_tag, created = TagModel.objects.get_or_create(name=tag, slug=slugify(tag))
TaggedItemModel.objects.get_or_create(
content_type_id=ct[0].id,
object_id=snapshot.id,
tag=new_tag
)
def reverse_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TaggedItemModel = apps.get_model("core", "TaggedItem")
TagModel = apps.get_model("taggit", "Tag")
ct = ContentType.objects.get(app_label="core", model="snapshot")
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
for tag in tags:
tagged_items = TaggedItemModel.objects.filter(
object_id=snapshot.id,
).delete()
class Migration(migrations.Migration):
dependencies = [
('contenttypes', '0002_remove_content_type_name'),
('taggit', '0003_taggeditem_add_unique_index'),
('core', '0005_auto_20200728_0326'),
]
operations = [
migrations.RenameField(
model_name='snapshot',
old_name='tags',
new_name='tags_old',
),
migrations.CreateModel(
name='TaggedItem',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('object_id', models.UUIDField(db_index=True, verbose_name='object ID')),
('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_tagged_items', to='contenttypes.ContentType', verbose_name='content type')),
('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_items', to='taggit.Tag')),
],
options={
'verbose_name': 'Tag',
'verbose_name_plural': 'Tags',
},
),
migrations.AddField(
model_name='snapshot',
name='tags',
field=taggit.managers.TaggableManager(help_text='A comma-separated list of tags.', through='core.TaggedItem', to='taggit.Tag', verbose_name='Tags'),
),
migrations.RunPython(forwards_func, reverse_func),
migrations.RemoveField(
model_name='snapshot',
name='tags_old',
),
]

View file

@ -5,10 +5,19 @@ import uuid
from django.db import models from django.db import models
from django.utils.functional import cached_property from django.utils.functional import cached_property
from taggit.managers import TaggableManager
from taggit.models import GenericUUIDTaggedItemBase, TaggedItemBase
from ..util import parse_date from ..util import parse_date
from ..index.schema import Link from ..index.schema import Link
class TaggedItem(GenericUUIDTaggedItemBase, TaggedItemBase):
class Meta:
verbose_name = "Tag"
verbose_name_plural = "Tags"
class Snapshot(models.Model): class Snapshot(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@ -16,7 +25,7 @@ class Snapshot(models.Model):
timestamp = models.CharField(max_length=32, unique=True, db_index=True) timestamp = models.CharField(max_length=32, unique=True, db_index=True)
title = models.CharField(max_length=128, null=True, blank=True, db_index=True) title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
tags = models.CharField(max_length=256, null=True, blank=True, db_index=True) tags = TaggableManager(through=TaggedItem)
added = models.DateTimeField(auto_now_add=True, db_index=True) added = models.DateTimeField(auto_now_add=True, db_index=True)
updated = models.DateTimeField(null=True, blank=True, db_index=True) updated = models.DateTimeField(null=True, blank=True, db_index=True)

View file

@ -31,6 +31,7 @@ INSTALLED_APPS = [
'core', 'core',
'django_extensions', 'django_extensions',
'taggit',
] ]

View file

@ -86,9 +86,16 @@ def merge_links(a: Link, b: Link) -> Link:
) )
# all unique, truthy tags # all unique, truthy tags
tags_a = []
if a.tags:
tags_a = a.tags.all()
tags_b = []
if b.tags:
tags_b = b.tags.all()
tags_set = ( tags_set = (
set(tag.strip() for tag in (a.tags or '').split(',')) set(tag.name.strip() for tag in tags_a)
| set(tag.strip() for tag in (b.tags or '').split(',')) | set(tag.name.strip() for tag in tags_b)
) )
tags = ','.join(tags_set) or None tags = ','.join(tags_set) or None

View file

@ -157,7 +157,8 @@ class Link:
assert isinstance(self.url, str) and '://' in self.url assert isinstance(self.url, str) and '://' in self.url
assert self.updated is None or isinstance(self.updated, datetime) assert self.updated is None or isinstance(self.updated, datetime)
assert self.title is None or (isinstance(self.title, str) and self.title) assert self.title is None or (isinstance(self.title, str) and self.title)
assert self.tags is None or isinstance(self.tags, str) #for tag in self.tags.all():
# assert tag is None or isinstance(tag, TaggedItem)
assert isinstance(self.sources, list) assert isinstance(self.sources, list)
assert all(isinstance(source, str) and source for source in self.sources) assert all(isinstance(source, str) and source for source in self.sources)
assert isinstance(self.history, dict) assert isinstance(self.history, dict)

View file

@ -65,7 +65,14 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
except Snapshot.DoesNotExist: except Snapshot.DoesNotExist:
snap = write_link_to_sql_index(link) snap = write_link_to_sql_index(link)
snap.title = link.title snap.title = link.title
snap.tags = link.tags
tag_set = (
set(tag.strip() for tag in (link.tags or '').split(','))
)
tag_list = list(tag_set) or []
for tag in tag_list:
snap.tags.add(tag)
snap.save() snap.save()

View file

@ -80,6 +80,7 @@ setuptools.setup(
"base32-crockford==0.3.0", "base32-crockford==0.3.0",
"django==3.0.8", "django==3.0.8",
"django-extensions==3.0.3", "django-extensions==3.0.3",
"django-taggit==1.3.0",
"dateparser", "dateparser",
"ipython", "ipython",

Binary file not shown.

View file

@ -4,7 +4,7 @@
import os import os
import subprocess import subprocess
from pathlib import Path from pathlib import Path
import json import json, shutil
import sqlite3 import sqlite3
from archivebox.config import OUTPUT_PERMISSIONS from archivebox.config import OUTPUT_PERMISSIONS
@ -132,3 +132,43 @@ def test_unrecognized_folders(tmp_path, process, disable_extractors_dict):
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict) init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
assert "Skipped adding 1 invalid link data directories" in init_process.stdout.decode("utf-8") assert "Skipped adding 1 invalid link data directories" in init_process.stdout.decode("utf-8")
assert init_process.returncode == 0 assert init_process.returncode == 0
def test_tags_migration(tmp_path, disable_extractors_dict):
base_sqlite_path = Path(__file__).parent / 'tags_migration'
if os.path.exists(tmp_path):
shutil.rmtree(tmp_path)
shutil.copytree(str(base_sqlite_path), tmp_path)
os.chdir(tmp_path)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("SELECT id, tags from core_snapshot")
snapshots = c.fetchall()
snapshots_dict = { sn['id']: sn['tags'] for sn in snapshots}
conn.commit()
conn.close()
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
conn = sqlite3.connect("index.sqlite3")
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT snapshot.id snapshot, tags.name tag
FROM core_snapshot snapshot, core_taggeditem snapshot_tagged, taggit_tag tags
WHERE
snapshot.id = snapshot_tagged.object_id
AND tags.id = snapshot_tagged.tag_id
""")
tags = c.fetchall()
conn.commit()
conn.close()
for tag in tags:
snapshot_id = tag['snapshot']
tag_name = tag['tag']
# Check each tag migrated is in the previous field
assert tag_name in snapshots_dict[snapshot_id]