mirror of
https://github.com/ArchiveBox/ArchiveBox
synced 2024-11-21 19:53:06 +00:00
Improved tags
This commit is contained in:
parent
0158efb1d0
commit
62c9028212
11 changed files with 172 additions and 10 deletions
|
@ -4,6 +4,7 @@ mypy-extensions==0.4.3
|
||||||
base32-crockford==0.3.0
|
base32-crockford==0.3.0
|
||||||
django==3.0.8
|
django==3.0.8
|
||||||
django-extensions==3.0.3
|
django-extensions==3.0.3
|
||||||
|
django-taggit==1.3.0
|
||||||
dateparser
|
dateparser
|
||||||
ipython
|
ipython
|
||||||
youtube-dl
|
youtube-dl
|
||||||
|
|
|
@ -66,6 +66,12 @@ class SnapshotAdmin(admin.ModelAdmin):
|
||||||
actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
|
actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
|
||||||
actions_template = 'admin/actions_as_select.html'
|
actions_template = 'admin/actions_as_select.html'
|
||||||
|
|
||||||
|
def get_queryset(self, request):
|
||||||
|
return super().get_queryset(request).prefetch_related('tags')
|
||||||
|
|
||||||
|
def tag_list(self, obj):
|
||||||
|
return u", ".join(o.name for o in obj.tags.all())
|
||||||
|
|
||||||
def id_str(self, obj):
|
def id_str(self, obj):
|
||||||
return format_html(
|
return format_html(
|
||||||
'<code style="font-size: 10px">{}</code>',
|
'<code style="font-size: 10px">{}</code>',
|
||||||
|
@ -75,9 +81,9 @@ class SnapshotAdmin(admin.ModelAdmin):
|
||||||
def title_str(self, obj):
|
def title_str(self, obj):
|
||||||
canon = obj.as_link().canonical_outputs()
|
canon = obj.as_link().canonical_outputs()
|
||||||
tags = ''.join(
|
tags = ''.join(
|
||||||
format_html('<span>{}</span>', tag.strip())
|
format_html(' <span>{}</span> ', tag)
|
||||||
for tag in obj.tags.split(',')
|
for tag in obj.tags.all()
|
||||||
) if obj.tags else ''
|
) if obj.tags.all() else ''
|
||||||
return format_html(
|
return format_html(
|
||||||
'<a href="/{}">'
|
'<a href="/{}">'
|
||||||
'<img src="/{}/{}" class="favicon" onerror="this.remove()">'
|
'<img src="/{}/{}" class="favicon" onerror="this.remove()">'
|
||||||
|
|
89
archivebox/core/migrations/0006_auto_20200915_2006.py
Normal file
89
archivebox/core/migrations/0006_auto_20200915_2006.py
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
# Generated by Django 3.0.8 on 2020-09-15 20:06
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
from django.contrib.contenttypes.models import ContentType
|
||||||
|
from django.utils.text import slugify
|
||||||
|
import django.db.models.deletion
|
||||||
|
import taggit.managers
|
||||||
|
|
||||||
|
def forwards_func(apps, schema_editor):
|
||||||
|
SnapshotModel = apps.get_model("core", "Snapshot")
|
||||||
|
TaggedItemModel = apps.get_model("core", "TaggedItem")
|
||||||
|
TagModel = apps.get_model("taggit", "Tag")
|
||||||
|
contents = ContentType.objects.all()
|
||||||
|
try:
|
||||||
|
ct = ContentType.objects.filter(app_label="core", model="snapshot")
|
||||||
|
except model.DoesNotExist: # Be explicit about exceptions
|
||||||
|
ct = None
|
||||||
|
|
||||||
|
db_alias = schema_editor.connection.alias
|
||||||
|
snapshots = SnapshotModel.objects.all()
|
||||||
|
for snapshot in snapshots:
|
||||||
|
tags = snapshot.tags
|
||||||
|
tag_set = (
|
||||||
|
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
|
||||||
|
)
|
||||||
|
tag_list = list(tag_set) or []
|
||||||
|
|
||||||
|
for tag in tag_list:
|
||||||
|
new_tag, created = TagModel.objects.get_or_create(name=tag, slug=slugify(tag))
|
||||||
|
TaggedItemModel.objects.get_or_create(
|
||||||
|
content_type_id=ct[0].id,
|
||||||
|
object_id=snapshot.id,
|
||||||
|
tag=new_tag
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_func(apps, schema_editor):
|
||||||
|
SnapshotModel = apps.get_model("core", "Snapshot")
|
||||||
|
TaggedItemModel = apps.get_model("core", "TaggedItem")
|
||||||
|
TagModel = apps.get_model("taggit", "Tag")
|
||||||
|
ct = ContentType.objects.get(app_label="core", model="snapshot")
|
||||||
|
|
||||||
|
db_alias = schema_editor.connection.alias
|
||||||
|
snapshots = SnapshotModel.objects.all()
|
||||||
|
for snapshot in snapshots:
|
||||||
|
for tag in tags:
|
||||||
|
tagged_items = TaggedItemModel.objects.filter(
|
||||||
|
object_id=snapshot.id,
|
||||||
|
).delete()
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('contenttypes', '0002_remove_content_type_name'),
|
||||||
|
('taggit', '0003_taggeditem_add_unique_index'),
|
||||||
|
('core', '0005_auto_20200728_0326'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.RenameField(
|
||||||
|
model_name='snapshot',
|
||||||
|
old_name='tags',
|
||||||
|
new_name='tags_old',
|
||||||
|
),
|
||||||
|
migrations.CreateModel(
|
||||||
|
name='TaggedItem',
|
||||||
|
fields=[
|
||||||
|
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
||||||
|
('object_id', models.UUIDField(db_index=True, verbose_name='object ID')),
|
||||||
|
('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_tagged_items', to='contenttypes.ContentType', verbose_name='content type')),
|
||||||
|
('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_items', to='taggit.Tag')),
|
||||||
|
],
|
||||||
|
options={
|
||||||
|
'verbose_name': 'Tag',
|
||||||
|
'verbose_name_plural': 'Tags',
|
||||||
|
},
|
||||||
|
),
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='tags',
|
||||||
|
field=taggit.managers.TaggableManager(help_text='A comma-separated list of tags.', through='core.TaggedItem', to='taggit.Tag', verbose_name='Tags'),
|
||||||
|
),
|
||||||
|
migrations.RunPython(forwards_func, reverse_func),
|
||||||
|
migrations.RemoveField(
|
||||||
|
model_name='snapshot',
|
||||||
|
name='tags_old',
|
||||||
|
),
|
||||||
|
]
|
|
@ -5,10 +5,19 @@ import uuid
|
||||||
from django.db import models
|
from django.db import models
|
||||||
from django.utils.functional import cached_property
|
from django.utils.functional import cached_property
|
||||||
|
|
||||||
|
from taggit.managers import TaggableManager
|
||||||
|
from taggit.models import GenericUUIDTaggedItemBase, TaggedItemBase
|
||||||
|
|
||||||
from ..util import parse_date
|
from ..util import parse_date
|
||||||
from ..index.schema import Link
|
from ..index.schema import Link
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class TaggedItem(GenericUUIDTaggedItemBase, TaggedItemBase):
|
||||||
|
class Meta:
|
||||||
|
verbose_name = "Tag"
|
||||||
|
verbose_name_plural = "Tags"
|
||||||
|
|
||||||
class Snapshot(models.Model):
|
class Snapshot(models.Model):
|
||||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||||
|
|
||||||
|
@ -16,7 +25,7 @@ class Snapshot(models.Model):
|
||||||
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
|
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
|
||||||
|
|
||||||
title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
|
title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
|
||||||
tags = models.CharField(max_length=256, null=True, blank=True, db_index=True)
|
tags = TaggableManager(through=TaggedItem)
|
||||||
|
|
||||||
added = models.DateTimeField(auto_now_add=True, db_index=True)
|
added = models.DateTimeField(auto_now_add=True, db_index=True)
|
||||||
updated = models.DateTimeField(null=True, blank=True, db_index=True)
|
updated = models.DateTimeField(null=True, blank=True, db_index=True)
|
||||||
|
|
|
@ -31,6 +31,7 @@ INSTALLED_APPS = [
|
||||||
'core',
|
'core',
|
||||||
|
|
||||||
'django_extensions',
|
'django_extensions',
|
||||||
|
'taggit',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -86,9 +86,16 @@ def merge_links(a: Link, b: Link) -> Link:
|
||||||
)
|
)
|
||||||
|
|
||||||
# all unique, truthy tags
|
# all unique, truthy tags
|
||||||
|
tags_a = []
|
||||||
|
if a.tags:
|
||||||
|
tags_a = a.tags.all()
|
||||||
|
tags_b = []
|
||||||
|
if b.tags:
|
||||||
|
tags_b = b.tags.all()
|
||||||
|
|
||||||
tags_set = (
|
tags_set = (
|
||||||
set(tag.strip() for tag in (a.tags or '').split(','))
|
set(tag.name.strip() for tag in tags_a)
|
||||||
| set(tag.strip() for tag in (b.tags or '').split(','))
|
| set(tag.name.strip() for tag in tags_b)
|
||||||
)
|
)
|
||||||
tags = ','.join(tags_set) or None
|
tags = ','.join(tags_set) or None
|
||||||
|
|
||||||
|
|
|
@ -157,7 +157,8 @@ class Link:
|
||||||
assert isinstance(self.url, str) and '://' in self.url
|
assert isinstance(self.url, str) and '://' in self.url
|
||||||
assert self.updated is None or isinstance(self.updated, datetime)
|
assert self.updated is None or isinstance(self.updated, datetime)
|
||||||
assert self.title is None or (isinstance(self.title, str) and self.title)
|
assert self.title is None or (isinstance(self.title, str) and self.title)
|
||||||
assert self.tags is None or isinstance(self.tags, str)
|
#for tag in self.tags.all():
|
||||||
|
# assert tag is None or isinstance(tag, TaggedItem)
|
||||||
assert isinstance(self.sources, list)
|
assert isinstance(self.sources, list)
|
||||||
assert all(isinstance(source, str) and source for source in self.sources)
|
assert all(isinstance(source, str) and source for source in self.sources)
|
||||||
assert isinstance(self.history, dict)
|
assert isinstance(self.history, dict)
|
||||||
|
|
|
@ -65,7 +65,14 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
|
||||||
except Snapshot.DoesNotExist:
|
except Snapshot.DoesNotExist:
|
||||||
snap = write_link_to_sql_index(link)
|
snap = write_link_to_sql_index(link)
|
||||||
snap.title = link.title
|
snap.title = link.title
|
||||||
snap.tags = link.tags
|
|
||||||
|
tag_set = (
|
||||||
|
set(tag.strip() for tag in (link.tags or '').split(','))
|
||||||
|
)
|
||||||
|
tag_list = list(tag_set) or []
|
||||||
|
|
||||||
|
for tag in tag_list:
|
||||||
|
snap.tags.add(tag)
|
||||||
snap.save()
|
snap.save()
|
||||||
|
|
||||||
|
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -80,6 +80,7 @@ setuptools.setup(
|
||||||
"base32-crockford==0.3.0",
|
"base32-crockford==0.3.0",
|
||||||
"django==3.0.8",
|
"django==3.0.8",
|
||||||
"django-extensions==3.0.3",
|
"django-extensions==3.0.3",
|
||||||
|
"django-taggit==1.3.0",
|
||||||
|
|
||||||
"dateparser",
|
"dateparser",
|
||||||
"ipython",
|
"ipython",
|
||||||
|
|
BIN
tests/tags_migration/index.sqlite3
Executable file
BIN
tests/tags_migration/index.sqlite3
Executable file
Binary file not shown.
|
@ -4,7 +4,7 @@
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import json
|
import json, shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
|
||||||
from archivebox.config import OUTPUT_PERMISSIONS
|
from archivebox.config import OUTPUT_PERMISSIONS
|
||||||
|
@ -131,4 +131,44 @@ def test_unrecognized_folders(tmp_path, process, disable_extractors_dict):
|
||||||
|
|
||||||
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
||||||
assert "Skipped adding 1 invalid link data directories" in init_process.stdout.decode("utf-8")
|
assert "Skipped adding 1 invalid link data directories" in init_process.stdout.decode("utf-8")
|
||||||
assert init_process.returncode == 0
|
assert init_process.returncode == 0
|
||||||
|
|
||||||
|
def test_tags_migration(tmp_path, disable_extractors_dict):
|
||||||
|
|
||||||
|
base_sqlite_path = Path(__file__).parent / 'tags_migration'
|
||||||
|
|
||||||
|
if os.path.exists(tmp_path):
|
||||||
|
shutil.rmtree(tmp_path)
|
||||||
|
shutil.copytree(str(base_sqlite_path), tmp_path)
|
||||||
|
os.chdir(tmp_path)
|
||||||
|
|
||||||
|
conn = sqlite3.connect("index.sqlite3")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute("SELECT id, tags from core_snapshot")
|
||||||
|
snapshots = c.fetchall()
|
||||||
|
snapshots_dict = { sn['id']: sn['tags'] for sn in snapshots}
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
init_process = subprocess.run(['archivebox', 'init'], capture_output=True, env=disable_extractors_dict)
|
||||||
|
|
||||||
|
conn = sqlite3.connect("index.sqlite3")
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
c = conn.cursor()
|
||||||
|
c.execute("""
|
||||||
|
SELECT snapshot.id snapshot, tags.name tag
|
||||||
|
FROM core_snapshot snapshot, core_taggeditem snapshot_tagged, taggit_tag tags
|
||||||
|
WHERE
|
||||||
|
snapshot.id = snapshot_tagged.object_id
|
||||||
|
AND tags.id = snapshot_tagged.tag_id
|
||||||
|
""")
|
||||||
|
tags = c.fetchall()
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
for tag in tags:
|
||||||
|
snapshot_id = tag['snapshot']
|
||||||
|
tag_name = tag['tag']
|
||||||
|
# Check each tag migrated is in the previous field
|
||||||
|
assert tag_name in snapshots_dict[snapshot_id]
|
Loading…
Reference in a new issue