From 5e8f5b2c588b6ad537cec100b9d18409149608a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Sun, 7 Apr 2024 18:13:28 +0200 Subject: [PATCH] Truncate snapshot filename for long URLs (#687) --- bookmarks/services/tasks.py | 10 ++++++++++ bookmarks/tests/test_bookmarks_tasks.py | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/bookmarks/services/tasks.py b/bookmarks/services/tasks.py index 2f1caad..6d24ba2 100644 --- a/bookmarks/services/tasks.py +++ b/bookmarks/services/tasks.py @@ -239,6 +239,9 @@ def create_html_snapshot(bookmark: Bookmark): asset.save() +MAX_SNAPSHOT_FILENAME_LENGTH = 192 + + def _generate_snapshot_filename(asset: BookmarkAsset) -> str: def sanitize_char(char): if char.isalnum() or char in ("-", "_", "."): @@ -249,6 +252,13 @@ def _generate_snapshot_filename(asset: BookmarkAsset) -> str: formatted_datetime = asset.date_created.strftime("%Y-%m-%d_%H%M%S") sanitized_url = "".join(sanitize_char(char) for char in asset.bookmark.url) + # Calculate the length of the non-URL parts of the filename + non_url_length = len(f"{asset.asset_type}{formatted_datetime}__.html.gz") + # Calculate the maximum length for the URL part + max_url_length = MAX_SNAPSHOT_FILENAME_LENGTH - non_url_length + # Truncate the URL if necessary + sanitized_url = sanitized_url[:max_url_length] + return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz" diff --git a/bookmarks/tests/test_bookmarks_tasks.py b/bookmarks/tests/test_bookmarks_tasks.py index e36183e..5040caa 100644 --- a/bookmarks/tests/test_bookmarks_tasks.py +++ b/bookmarks/tests/test_bookmarks_tasks.py @@ -556,6 +556,21 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin): self.assertEqual(asset.file, expected_filename) self.assertTrue(asset.gzip) + @override_settings(LD_ENABLE_SNAPSHOTS=True) + def test_create_html_snapshot_truncate_filename(self): + # Create a bookmark with a very long URL + long_url = "http://" + "a" * 300 + ".com" + bookmark = self.setup_bookmark(url=long_url) + + tasks.create_html_snapshot(bookmark) + BookmarkAsset.objects.get(bookmark=bookmark) + + # Run periodic task to process the snapshot + tasks._schedule_html_snapshots_task() + + asset = BookmarkAsset.objects.get(bookmark=bookmark) + self.assertEqual(len(asset.file), 192) + @override_settings(LD_ENABLE_SNAPSHOTS=True) def test_create_html_snapshot_should_handle_error(self): bookmark = self.setup_bookmark(url="https://example.com")