Dont sanitize RSS content manually (fixes #5171) (#5174)

This commit is contained in:
Nutomic 2024-11-18 15:58:31 +01:00 committed by GitHub
parent 797aac7281
commit 556358f53e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 30 additions and 36 deletions

16
Cargo.lock generated
View file

@ -3796,6 +3796,16 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "quick-xml"
version = "0.37.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f22f29bdff3987b4d8632ef95fd6424ec7e4e0a57e2f4fc63e489e75357f6a03"
dependencies = [
"encoding_rs",
"memchr",
]
[[package]] [[package]]
name = "quinn" name = "quinn"
version = "0.11.5" version = "0.11.5"
@ -4158,14 +4168,14 @@ dependencies = [
[[package]] [[package]]
name = "rss" name = "rss"
version = "2.0.9" version = "2.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "27e92048f840d98c6d6dd870af9101610ea9ff413f11f1bcebf4f4c31d96d957" checksum = "554a62b3dd5450fcbb0435b3db809f9dd3c6e9f5726172408f7ad3b57ed59057"
dependencies = [ dependencies = [
"atom_syndication", "atom_syndication",
"derive_builder", "derive_builder",
"never", "never",
"quick-xml 0.36.1", "quick-xml 0.37.1",
] ]
[[package]] [[package]]

View file

@ -33,4 +33,4 @@ url = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
http.workspace = true http.workspace = true
rss = "2.0.9" rss = "2.0.10"

View file

@ -23,7 +23,7 @@ use lemmy_db_views_actor::{
use lemmy_utils::{ use lemmy_utils::{
cache_header::cache_1hour, cache_header::cache_1hour,
error::{LemmyError, LemmyErrorType, LemmyResult}, error::{LemmyError, LemmyErrorType, LemmyResult},
utils::markdown::{markdown_to_html, sanitize_html}, utils::markdown::markdown_to_html,
}; };
use rss::{ use rss::{
extension::{dublincore::DublinCoreExtension, ExtensionBuilder, ExtensionMap}, extension::{dublincore::DublinCoreExtension, ExtensionBuilder, ExtensionMap},
@ -93,23 +93,6 @@ static RSS_NAMESPACE: LazyLock<BTreeMap<String, String>> = LazyLock::new(|| {
h h
}); });
/// Removes any characters disallowed by the XML grammar.
/// See https://www.w3.org/TR/xml/#NT-Char for details.
fn sanitize_xml(input: String) -> String {
input
.chars()
.filter(|&c| {
matches!(c,
'\u{09}'
| '\u{0A}'
| '\u{0D}'
| '\u{20}'..='\u{D7FF}'
| '\u{E000}'..='\u{FFFD}'
| '\u{10000}'..='\u{10FFFF}')
})
.collect()
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn get_all_feed( async fn get_all_feed(
info: web::Query<Params>, info: web::Query<Params>,
@ -278,7 +261,7 @@ async fn get_feed_user(
let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?; let items = create_post_items(posts, &context.settings().get_protocol_and_hostname())?;
let channel = Channel { let channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - {}", sanitize_xml(site_view.site.name), person.name), title: format!("{} - {}", site_view.site.name, person.name),
link: person.actor_id.to_string(), link: person.actor_id.to_string(),
items, items,
..Default::default() ..Default::default()
@ -319,7 +302,7 @@ async fn get_feed_community(
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - {}", sanitize_xml(site_view.site.name), community.name), title: format!("{} - {}", site_view.site.name, community.name),
link: community.actor_id.to_string(), link: community.actor_id.to_string(),
items, items,
..Default::default() ..Default::default()
@ -360,7 +343,7 @@ async fn get_feed_front(
let items = create_post_items(posts, &protocol_and_hostname)?; let items = create_post_items(posts, &protocol_and_hostname)?;
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - Subscribed", sanitize_xml(site_view.site.name)), title: format!("{} - Subscribed", site_view.site.name),
link: protocol_and_hostname, link: protocol_and_hostname,
items, items,
..Default::default() ..Default::default()
@ -411,7 +394,7 @@ async fn get_feed_inbox(context: &LemmyContext, jwt: &str) -> LemmyResult<Channe
let mut channel = Channel { let mut channel = Channel {
namespaces: RSS_NAMESPACE.clone(), namespaces: RSS_NAMESPACE.clone(),
title: format!("{} - Inbox", sanitize_xml(site_view.site.name)), title: format!("{} - Inbox", site_view.site.name),
link: format!("{protocol_and_hostname}/inbox"), link: format!("{protocol_and_hostname}/inbox"),
items, items,
..Default::default() ..Default::default()
@ -498,11 +481,7 @@ fn create_post_items(posts: Vec<PostView>, protocol_and_hostname: &str) -> Lemmy
for p in posts { for p in posts {
let post_url = format!("{}/post/{}", protocol_and_hostname, p.post.id); let post_url = format!("{}/post/{}", protocol_and_hostname, p.post.id);
let community_url = format!( let community_url = format!("{}/c/{}", protocol_and_hostname, &p.community.name);
"{}/c/{}",
protocol_and_hostname,
sanitize_html(&p.community.name)
);
let dublin_core_ext = Some(DublinCoreExtension { let dublin_core_ext = Some(DublinCoreExtension {
creators: vec![p.creator.actor_id.to_string()], creators: vec![p.creator.actor_id.to_string()],
..DublinCoreExtension::default() ..DublinCoreExtension::default()
@ -513,9 +492,9 @@ fn create_post_items(posts: Vec<PostView>, protocol_and_hostname: &str) -> Lemmy
}); });
let mut description = format!("submitted by <a href=\"{}\">{}</a> to <a href=\"{}\">{}</a><br>{} points | <a href=\"{}\">{} comments</a>", let mut description = format!("submitted by <a href=\"{}\">{}</a> to <a href=\"{}\">{}</a><br>{} points | <a href=\"{}\">{} comments</a>",
p.creator.actor_id, p.creator.actor_id,
sanitize_html(&p.creator.name), &p.creator.name,
community_url, community_url,
sanitize_html(&p.community.name), &p.community.name,
p.counts.score, p.counts.score,
post_url, post_url,
p.counts.comments); p.counts.comments);
@ -566,11 +545,11 @@ fn create_post_items(posts: Vec<PostView>, protocol_and_hostname: &str) -> Lemmy
}; };
let i = Item { let i = Item {
title: Some(sanitize_html(sanitize_xml(p.post.name).as_str())), title: Some(p.post.name),
pub_date: Some(p.post.published.to_rfc2822()), pub_date: Some(p.post.published.to_rfc2822()),
comments: Some(post_url.clone()), comments: Some(post_url.clone()),
guid, guid,
description: Some(sanitize_xml(description)), description: Some(description),
dublin_core_ext, dublin_core_ext,
link: Some(post_url.clone()), link: Some(post_url.clone()),
extensions, extensions,

View file

@ -259,6 +259,11 @@ mod tests {
fn test_sanitize_html() { fn test_sanitize_html() {
let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'"); let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'");
let expected = "&lt;script>alert(&#x27;xss&#x27;);&lt;/script> hello &amp;&quot;&#x27;"; let expected = "&lt;script>alert(&#x27;xss&#x27;);&lt;/script> hello &amp;&quot;&#x27;";
assert_eq!(expected, sanitized) assert_eq!(expected, sanitized);
let sanitized =
sanitize_html("Polling the group: what do y'all know about the Orion browser from Kagi?");
let expected = "Polling the group: what do y&#x27;all know about the Orion browser from Kagi?";
assert_eq!(expected, sanitized);
} }
} }