From de328c78e26be685e52a7f7dec1995e3dea9912c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sascha=20I=C3=9Fbr=C3=BCcker?= Date: Fri, 27 Oct 2023 19:59:06 +0200 Subject: [PATCH] Sanitize RSS feed to remove control characters (#565) --- bookmarks/feeds.py | 13 ++++++++++--- bookmarks/tests/test_feeds.py | 8 ++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/bookmarks/feeds.py b/bookmarks/feeds.py index 3ebc98c..ff7362a 100644 --- a/bookmarks/feeds.py +++ b/bookmarks/feeds.py @@ -1,11 +1,12 @@ +import unicodedata from dataclasses import dataclass from django.contrib.syndication.views import Feed from django.db.models import QuerySet from django.urls import reverse -from bookmarks.models import Bookmark, BookmarkSearch, FeedToken from bookmarks import queries +from bookmarks.models import Bookmark, BookmarkSearch, FeedToken @dataclass @@ -14,6 +15,12 @@ class FeedContext: query_set: QuerySet[Bookmark] +def sanitize(text: str): + # remove control characters + valid_chars = ['\n', '\r', '\t'] + return ''.join(ch for ch in text if ch in valid_chars or unicodedata.category(ch)[0] != 'C') + + class BaseBookmarksFeed(Feed): def get_object(self, request, feed_key: str): feed_token = FeedToken.objects.get(key__exact=feed_key) @@ -22,10 +29,10 @@ class BaseBookmarksFeed(Feed): return FeedContext(feed_token, query_set) def item_title(self, item: Bookmark): - return item.resolved_title + return sanitize(item.resolved_title) def item_description(self, item: Bookmark): - return item.resolved_description + return sanitize(item.resolved_description) def item_link(self, item: Bookmark): return item.url diff --git a/bookmarks/tests/test_feeds.py b/bookmarks/tests/test_feeds.py index 4bf74ce..4d28df9 100644 --- a/bookmarks/tests/test_feeds.py +++ b/bookmarks/tests/test_feeds.py @@ -104,6 +104,14 @@ class FeedsTestCase(TestCase, BookmarkFactoryMixin): self.assertContains(response, '', count=0) + def test_strip_control_characters(self): + self.setup_bookmark(title='test\n\r\t\0\x08title', description='test\n\r\t\0\x08description') + response = self.client.get(reverse('bookmarks:feeds.all', args=[self.token.key])) + self.assertEqual(response.status_code, 200) + self.assertContains(response, '', count=1) + self.assertContains(response, f'test\n\r\ttitle', count=1) + self.assertContains(response, f'test\n\r\tdescription', count=1) + def test_unread_returns_404_for_unknown_feed_token(self): response = self.client.get(reverse('bookmarks:feeds.unread', args=['foo']))