refactor: Remove django-taggit and replace it with a local tags setup

This commit is contained in:
Cristian 2020-10-12 13:47:03 -05:00
parent 10384a8a6f
commit 62c78e1d10
11 changed files with 313 additions and 112 deletions

View file

@ -4,7 +4,6 @@ mypy-extensions==0.4.3
base32-crockford==0.3.0
django==3.0.8
django-extensions==3.0.3
django-taggit==1.3.0
dateparser
ipython
youtube-dl

View file

@ -9,9 +9,10 @@ from django.utils.html import format_html
from django.utils.safestring import mark_safe
from django.shortcuts import render, redirect
from django.contrib.auth import get_user_model
from django import forms
from core.models import Snapshot
from core.forms import AddLinkForm
from core.forms import AddLinkForm, TagField
from core.utils import get_icons
from util import htmldecode, urldecode, ansi_to_html
@ -55,6 +56,32 @@ def delete_snapshots(modeladmin, request, queryset):
delete_snapshots.short_description = "Delete"
class SnapshotAdminForm(forms.ModelForm):
tags = TagField(required=False)
class Meta:
model = Snapshot
fields = "__all__"
def save(self, commit=True):
# Based on: https://stackoverflow.com/a/49933068/3509554
# Get the unsave instance
instance = forms.ModelForm.save(self, False)
tags = self.cleaned_data.pop("tags")
#update save_m2m
def new_save_m2m():
instance.save_tags(tags)
# Do we need to save all changes now?
self.save_m2m = new_save_m2m
if commit:
instance.save()
return instance
class SnapshotAdmin(admin.ModelAdmin):
list_display = ('added', 'title_str', 'url_str', 'files', 'size')
sort_fields = ('title_str', 'url_str', 'added')
@ -65,6 +92,7 @@ class SnapshotAdmin(admin.ModelAdmin):
ordering = ['-added']
actions = [delete_snapshots, overwrite_snapshots, update_snapshots, update_titles, verify_snapshots]
actions_template = 'admin/actions_as_select.html'
form = SnapshotAdminForm
def get_queryset(self, request):
return super().get_queryset(request).prefetch_related('tags')

View file

@ -3,6 +3,7 @@ __package__ = 'archivebox.core'
from django import forms
from ..util import URL_REGEX
from .utils_taggit import edit_string_for_tags, parse_tags
CHOICES = (
('0', 'depth = 0 (archive just these URLs)'),
@ -12,3 +13,44 @@ CHOICES = (
class AddLinkForm(forms.Form):
url = forms.RegexField(label="URLs (one per line)", regex=URL_REGEX, min_length='6', strip=True, widget=forms.Textarea, required=True)
depth = forms.ChoiceField(label="Archive depth", choices=CHOICES, widget=forms.RadioSelect, initial='0')
class TagWidgetMixin:
def format_value(self, value):
if value is not None and not isinstance(value, str):
value = edit_string_for_tags(value)
return super().format_value(value)
class TagWidget(TagWidgetMixin, forms.TextInput):
pass
class TagField(forms.CharField):
widget = TagWidget
def clean(self, value):
value = super().clean(value)
try:
return parse_tags(value)
except ValueError:
raise forms.ValidationError(
"Please provide a comma-separated list of tags."
)
def has_changed(self, initial_value, data_value):
# Always return False if the field is disabled since self.bound_data
# always uses the initial value in this case.
if self.disabled:
return False
try:
data_value = self.clean(data_value)
except forms.ValidationError:
pass
if initial_value is None:
initial_value = []
initial_value = [tag.name for tag in initial_value]
initial_value.sort()
return initial_value != data_value

View file

@ -1,90 +0,0 @@
# Generated by Django 3.0.8 on 2020-09-15 20:06
from django.db import migrations, models
from django.contrib.contenttypes.models import ContentType
from django.utils.text import slugify
import django.db.models.deletion
import taggit.managers
def forwards_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TaggedItemModel = apps.get_model("core", "TaggedItem")
TagModel = apps.get_model("taggit", "Tag")
contents = ContentType.objects.all()
try:
ct = ContentType.objects.filter(app_label="core", model="snapshot")
except model.DoesNotExist: # Be explicit about exceptions
ct = None
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
tags = snapshot.tags
tag_set = (
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
)
tag_set.discard("")
for tag in tag_set:
new_tag, created = TagModel.objects.get_or_create(name=tag, slug=slugify(tag))
TaggedItemModel.objects.get_or_create(
content_type_id=ct[0].id,
object_id=snapshot.id,
tag=new_tag
)
def reverse_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TaggedItemModel = apps.get_model("core", "TaggedItem")
TagModel = apps.get_model("taggit", "Tag")
ct = ContentType.objects.get(app_label="core", model="snapshot")
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
tags = TaggedItemModel.objects.filter(
object_id=snapshot.id,
)
snapshot.tags_old = ",".join([tag.tag.name for tag in tags])
snapshot.save()
class Migration(migrations.Migration):
dependencies = [
('contenttypes', '0002_remove_content_type_name'),
('taggit', '0003_taggeditem_add_unique_index'),
('core', '0005_auto_20200728_0326'),
]
operations = [
migrations.RenameField(
model_name='snapshot',
old_name='tags',
new_name='tags_old',
),
migrations.CreateModel(
name='TaggedItem',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('object_id', models.UUIDField(db_index=True, verbose_name='object ID')),
('content_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_tagged_items', to='contenttypes.ContentType', verbose_name='content type')),
('tag', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='core_taggeditem_items', to='taggit.Tag')),
],
options={
'verbose_name': 'Tag',
'verbose_name_plural': 'Tags',
},
),
migrations.AddField(
model_name='snapshot',
name='tags',
field=taggit.managers.TaggableManager(help_text='A comma-separated list of tags.', through='core.TaggedItem', to='taggit.Tag', verbose_name='Tags'),
),
migrations.RunPython(forwards_func, reverse_func),
migrations.RemoveField(
model_name='snapshot',
name='tags_old',
),
]

View file

@ -0,0 +1,70 @@
# Generated by Django 3.0.8 on 2020-10-12 15:20
from django.db import migrations, models
from django.utils.text import slugify
def forwards_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TagModel = apps.get_model("core", "Tag")
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
tags = snapshot.tags
tag_set = (
set(tag.strip() for tag in (snapshot.tags_old or '').split(','))
)
tag_set.discard("")
for tag in tag_set:
to_add, _ = TagModel.objects.get_or_create(name=tag, slug=slugify(tag))
snapshot.tags.add(to_add)
def reverse_func(apps, schema_editor):
SnapshotModel = apps.get_model("core", "Snapshot")
TagModel = apps.get_model("core", "Tag")
db_alias = schema_editor.connection.alias
snapshots = SnapshotModel.objects.all()
for snapshot in snapshots:
tags = snapshot.tags.values_list("name", flat=True)
snapshot.tags_old = ",".join([tag for tag in tags])
snapshot.save()
class Migration(migrations.Migration):
dependencies = [
('core', '0005_auto_20200728_0326'),
]
operations = [
migrations.RenameField(
model_name='snapshot',
old_name='tags',
new_name='tags_old',
),
migrations.CreateModel(
name='Tag',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=100, unique=True, verbose_name='name')),
('slug', models.SlugField(max_length=100, unique=True, verbose_name='slug')),
],
options={
'verbose_name': 'Tag',
'verbose_name_plural': 'Tags',
},
),
migrations.AddField(
model_name='snapshot',
name='tags',
field=models.ManyToManyField(to='core.Tag'),
),
migrations.RunPython(forwards_func, reverse_func),
migrations.RemoveField(
model_name='snapshot',
name='tags_old',
),
]

View file

@ -2,22 +2,55 @@ __package__ = 'archivebox.core'
import uuid
from django.db import models
from django.db import models, transaction
from django.utils.functional import cached_property
from taggit.managers import TaggableManager
from taggit.models import GenericUUIDTaggedItemBase, TaggedItemBase
from django.utils.text import slugify
from ..util import parse_date
from ..index.schema import Link
class Tag(models.Model):
"""
Based on django-taggit model
"""
name = models.CharField(verbose_name="name", unique=True, blank=False, max_length=100)
slug = models.SlugField(verbose_name="slug", unique=True, max_length=100)
class TaggedItem(GenericUUIDTaggedItemBase, TaggedItemBase):
class Meta:
verbose_name = "Tag"
verbose_name_plural = "Tags"
def __str__(self):
return self.name
def slugify(self, tag, i=None):
slug = slugify(tag)
if i is not None:
slug += "_%d" % i
return slug
def save(self, *args, **kwargs):
if self._state.adding and not self.slug:
self.slug = self.slugify(self.name)
with transaction.atomic():
slugs = set(
type(self)
._default_manager.filter(slug__startswith=self.slug)
.values_list("slug", flat=True)
)
i = None
while True:
slug = self.slugify(self.name, i)
if slug not in slugs:
self.slug = slug
return super().save(*args, **kwargs)
i = 1 if i is None else i+1
else:
return super().save(*args, **kwargs)
class Snapshot(models.Model):
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
@ -25,11 +58,10 @@ class Snapshot(models.Model):
timestamp = models.CharField(max_length=32, unique=True, db_index=True)
title = models.CharField(max_length=128, null=True, blank=True, db_index=True)
tags = TaggableManager(through=TaggedItem)
added = models.DateTimeField(auto_now_add=True, db_index=True)
updated = models.DateTimeField(null=True, blank=True, db_index=True)
# bookmarked = models.DateTimeField()
tags = models.ManyToManyField(Tag)
keys = ('url', 'timestamp', 'title', 'tags', 'updated')
@ -113,3 +145,10 @@ class Snapshot(models.Model):
and self.history['title'][-1].output.strip()):
return self.history['title'][-1].output.strip()
return None
def save_tags(self, tags=[]):
tags_id = []
for tag in tags:
tags_id.append(Tag.objects.get_or_create(name=tag)[0].id)
self.tags.clear()
self.tags.add(*tags_id)

View file

@ -31,7 +31,6 @@ INSTALLED_APPS = [
'core',
'django_extensions',
'taggit',
]

View file

@ -0,0 +1,113 @@
# Taken from https://github.com/jazzband/django-taggit/blob/3b56adb637ab95aca5036c37a358402c825a367c/taggit/utils.py
def parse_tags(tagstring):
"""
Parses tag input, with multiple word input being activated and
delineated by commas and double quotes. Quotes take precedence, so
they may contain commas.
Returns a sorted list of unique tag names.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not tagstring:
return []
# Special case - if there are no commas or double quotes in the
# input, we don't *do* a recall... I mean, we know we only need to
# split on spaces.
if "," not in tagstring and '"' not in tagstring:
words = list(set(split_strip(tagstring, " ")))
words.sort()
return words
words = []
buffer = []
# Defer splitting of non-quoted sections until we know if there are
# any unquoted commas.
to_be_split = []
saw_loose_comma = False
open_quote = False
i = iter(tagstring)
try:
while True:
c = next(i)
if c == '"':
if buffer:
to_be_split.append("".join(buffer))
buffer = []
# Find the matching quote
open_quote = True
c = next(i)
while c != '"':
buffer.append(c)
c = next(i)
if buffer:
word = "".join(buffer).strip()
if word:
words.append(word)
buffer = []
open_quote = False
else:
if not saw_loose_comma and c == ",":
saw_loose_comma = True
buffer.append(c)
except StopIteration:
# If we were parsing an open quote which was never closed treat
# the buffer as unquoted.
if buffer:
if open_quote and "," in buffer:
saw_loose_comma = True
to_be_split.append("".join(buffer))
if to_be_split:
if saw_loose_comma:
delimiter = ","
else:
delimiter = " "
for chunk in to_be_split:
words.extend(split_strip(chunk, delimiter))
words = list(set(words))
words.sort()
return words
def split_strip(string, delimiter=","):
"""
Splits ``string`` on ``delimiter``, stripping each resulting string
and returning a list of non-empty strings.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
if not string:
return []
words = [w.strip() for w in string.split(delimiter)]
return [w for w in words if w]
def edit_string_for_tags(tags):
"""
Given list of ``Tag`` instances, creates a string representation of
the list suitable for editing by the user, such that submitting the
given string representation back without changing it will give the
same list of tags.
Tag names which contain commas will be double quoted.
If any tag name which isn't being quoted contains whitespace, the
resulting string of tag names will be comma-delimited, otherwise
it will be space-delimited.
Ported from Jonathan Buchanan's `django-tagging
<http://django-tagging.googlecode.com/>`_
"""
names = []
for tag in tags:
name = tag.name
if "," in name or " " in name:
names.append('"%s"' % name)
else:
names.append(name)
return ", ".join(sorted(names))

View file

@ -34,14 +34,19 @@ def remove_from_sql_main_index(snapshots: QuerySet, out_dir: Path=OUTPUT_DIR) ->
def write_link_to_sql_index(link: Link):
from core.models import Snapshot
info = {k: v for k, v in link._asdict().items() if k in Snapshot.keys}
tags = info.pop("tags")
if tags is None:
tags = []
try:
info["timestamp"] = Snapshot.objects.get(url=link.url).timestamp
except Snapshot.DoesNotExist:
while Snapshot.objects.filter(timestamp=info["timestamp"]).exists():
info["timestamp"] = str(float(info["timestamp"]) + 1.0)
Snapshot.objects.update_or_create(url=link.url, defaults=info)
return Snapshot.objects.get(url=link.url)
snapshot, _ = Snapshot.objects.update_or_create(url=link.url, defaults=info)
snapshot.save_tags(tags)
return snapshot
@enforce_types
@ -72,9 +77,8 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR) -> None:
)
tag_list = list(tag_set) or []
for tag in tag_list:
snap.tags.add(tag)
snap.save()
snap.save_tags(tag_list)

View file

@ -80,7 +80,6 @@ setuptools.setup(
"base32-crockford==0.3.0",
"django==3.0.8",
"django-extensions==3.0.3",
"django-taggit==1.3.0",
"dateparser",
"ipython",

View file

@ -157,18 +157,16 @@ def test_tags_migration(tmp_path, disable_extractors_dict):
conn.row_factory = sqlite3.Row
c = conn.cursor()
c.execute("""
SELECT snapshot.id snapshot, tags.name tag
FROM core_snapshot snapshot, core_taggeditem snapshot_tagged, taggit_tag tags
WHERE
snapshot.id = snapshot_tagged.object_id
AND tags.id = snapshot_tagged.tag_id
SELECT core_snapshot.id, core_tag.name from core_snapshot
JOIN core_snapshot_tags on core_snapshot_tags.snapshot_id=core_snapshot.id
JOIN core_tag on core_tag.id=core_snapshot_tags.tag_id
""")
tags = c.fetchall()
conn.commit()
conn.close()
for tag in tags:
snapshot_id = tag['snapshot']
tag_name = tag['tag']
snapshot_id = tag["id"]
tag_name = tag["name"]
# Check each tag migrated is in the previous field
assert tag_name in snapshots_dict[snapshot_id]