Truncate snapshot filename for long URLs (#687)

This commit is contained in:
Sascha Ißbrücker 2024-04-07 18:13:28 +02:00 committed by GitHub
parent d5a83722de
commit 5e8f5b2c58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 25 additions and 0 deletions

View file

@ -239,6 +239,9 @@ def create_html_snapshot(bookmark: Bookmark):
asset.save()
MAX_SNAPSHOT_FILENAME_LENGTH = 192
def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
def sanitize_char(char):
if char.isalnum() or char in ("-", "_", "."):
@ -249,6 +252,13 @@ def _generate_snapshot_filename(asset: BookmarkAsset) -> str:
formatted_datetime = asset.date_created.strftime("%Y-%m-%d_%H%M%S")
sanitized_url = "".join(sanitize_char(char) for char in asset.bookmark.url)
# Calculate the length of the non-URL parts of the filename
non_url_length = len(f"{asset.asset_type}{formatted_datetime}__.html.gz")
# Calculate the maximum length for the URL part
max_url_length = MAX_SNAPSHOT_FILENAME_LENGTH - non_url_length
# Truncate the URL if necessary
sanitized_url = sanitized_url[:max_url_length]
return f"{asset.asset_type}_{formatted_datetime}_{sanitized_url}.html.gz"

View file

@ -556,6 +556,21 @@ class BookmarkTasksTestCase(TestCase, BookmarkFactoryMixin):
self.assertEqual(asset.file, expected_filename)
self.assertTrue(asset.gzip)
@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_truncate_filename(self):
# Create a bookmark with a very long URL
long_url = "http://" + "a" * 300 + ".com"
bookmark = self.setup_bookmark(url=long_url)
tasks.create_html_snapshot(bookmark)
BookmarkAsset.objects.get(bookmark=bookmark)
# Run periodic task to process the snapshot
tasks._schedule_html_snapshots_task()
asset = BookmarkAsset.objects.get(bookmark=bookmark)
self.assertEqual(len(asset.file), 192)
@override_settings(LD_ENABLE_SNAPSHOTS=True)
def test_create_html_snapshot_should_handle_error(self):
bookmark = self.setup_bookmark(url="https://example.com")