Sanitize RSS feed to remove control characters (#565)

This commit is contained in:
Sascha Ißbrücker 2023-10-27 19:59:06 +02:00 committed by GitHub
parent 314e4a9b74
commit de328c78e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 18 additions and 3 deletions

View file

@ -1,11 +1,12 @@
import unicodedata
from dataclasses import dataclass
from django.contrib.syndication.views import Feed
from django.db.models import QuerySet
from django.urls import reverse
from bookmarks.models import Bookmark, BookmarkSearch, FeedToken
from bookmarks import queries
from bookmarks.models import Bookmark, BookmarkSearch, FeedToken
@dataclass
@ -14,6 +15,12 @@ class FeedContext:
query_set: QuerySet[Bookmark]
def sanitize(text: str):
# remove control characters
valid_chars = ['\n', '\r', '\t']
return ''.join(ch for ch in text if ch in valid_chars or unicodedata.category(ch)[0] != 'C')
class BaseBookmarksFeed(Feed):
def get_object(self, request, feed_key: str):
feed_token = FeedToken.objects.get(key__exact=feed_key)
@ -22,10 +29,10 @@ class BaseBookmarksFeed(Feed):
return FeedContext(feed_token, query_set)
def item_title(self, item: Bookmark):
return item.resolved_title
return sanitize(item.resolved_title)
def item_description(self, item: Bookmark):
return item.resolved_description
return sanitize(item.resolved_description)
def item_link(self, item: Bookmark):
return item.url

View file

@ -104,6 +104,14 @@ class FeedsTestCase(TestCase, BookmarkFactoryMixin):
self.assertContains(response, '<item>', count=0)
def test_strip_control_characters(self):
self.setup_bookmark(title='test\n\r\t\0\x08title', description='test\n\r\t\0\x08description')
response = self.client.get(reverse('bookmarks:feeds.all', args=[self.token.key]))
self.assertEqual(response.status_code, 200)
self.assertContains(response, '<item>', count=1)
self.assertContains(response, f'<title>test\n\r\ttitle</title>', count=1)
self.assertContains(response, f'<description>test\n\r\tdescription</description>', count=1)
def test_unread_returns_404_for_unknown_feed_token(self):
response = self.client.get(reverse('bookmarks:feeds.unread', args=['foo']))