Generate post thumbnail/metadata in background (ref #4529) (#4564)

* Generate post thumbnail/metadata in background (ref #4529)

* fix api test

* Apply suggestions from code review

Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com>

* fix test

---------

Co-authored-by: SleeplessOne1917 <28871516+SleeplessOne1917@users.noreply.github.com>
This commit is contained in:
Nutomic 2024-03-27 15:54:42 +01:00 committed by GitHub
parent a632a86852
commit a4b79ca610
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 135 additions and 112 deletions

View file

@ -17,13 +17,16 @@ import {
deleteAllImages, deleteAllImages,
delta, delta,
epsilon, epsilon,
followCommunity,
gamma, gamma,
getSite, getSite,
imageFetchLimit, imageFetchLimit,
registerUser, registerUser,
resolveBetaCommunity, resolveBetaCommunity,
resolveCommunity,
resolvePost, resolvePost,
setupLogins, setupLogins,
waitForPost,
unfollows, unfollows,
} from "./shared"; } from "./shared";
const downloadFileSync = require("download-file-sync"); const downloadFileSync = require("download-file-sync");
@ -209,6 +212,11 @@ test("Images in remote post are proxied if setting enabled", async () => {
test("No image proxying if setting is disabled", async () => { test("No image proxying if setting is disabled", async () => {
let user = await registerUser(beta, betaUrl); let user = await registerUser(beta, betaUrl);
let community = await createCommunity(alpha); let community = await createCommunity(alpha);
let betaCommunity = await resolveCommunity(
beta,
community.community_view.community.actor_id,
);
await followCommunity(beta, true, betaCommunity.community!.community.id);
const upload_form: UploadImage = { const upload_form: UploadImage = {
image: Buffer.from("test"), image: Buffer.from("test"),
@ -228,15 +236,19 @@ test("No image proxying if setting is disabled", async () => {
).toBeTruthy(); ).toBeTruthy();
expect(post.post_view.post.body).toBe("![](http://example.com/image2.png)"); expect(post.post_view.post.body).toBe("![](http://example.com/image2.png)");
let gammaPost = await resolvePost(delta, post.post_view.post); let betaPost = await waitForPost(
expect(gammaPost.post).toBeDefined(); beta,
post.post_view.post,
res => res?.post.alt_text != null,
);
expect(betaPost.post).toBeDefined();
// remote image doesnt get proxied after federation // remote image doesnt get proxied after federation
expect( expect(
gammaPost.post!.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"), betaPost.post.url?.startsWith("http://127.0.0.1:8551/pictrs/image/"),
).toBeTruthy(); ).toBeTruthy();
expect(gammaPost.post!.post.body).toBe("![](http://example.com/image2.png)"); expect(betaPost.post.body).toBe("![](http://example.com/image2.png)");
// Make sure the alt text got federated // Make sure the alt text got federated
expect(post.post_view.post.alt_text).toBe(gammaPost.post!.post.alt_text); expect(post.post_view.post.alt_text).toBe(betaPost.post.alt_text);
}); });

View file

@ -55,7 +55,18 @@ afterAll(() => {
unfollows(); unfollows();
}); });
function assertPostFederation(postOne?: PostView, postTwo?: PostView) { async function assertPostFederation(postOne: PostView, postTwo: PostView) {
// Link metadata is generated in background task and may not be ready yet at this time,
// so wait for it explicitly. For removed posts we cant refetch anything.
postOne = await waitForPost(beta, postOne.post, res => {
return res === null || res?.post.embed_title !== null;
});
postTwo = await waitForPost(
beta,
postTwo.post,
res => res === null || res?.post.embed_title !== null,
);
expect(postOne?.post.ap_id).toBe(postTwo?.post.ap_id); expect(postOne?.post.ap_id).toBe(postTwo?.post.ap_id);
expect(postOne?.post.name).toBe(postTwo?.post.name); expect(postOne?.post.name).toBe(postTwo?.post.name);
expect(postOne?.post.body).toBe(postTwo?.post.body); expect(postOne?.post.body).toBe(postTwo?.post.body);
@ -109,7 +120,7 @@ test("Create a post", async () => {
expect(betaPost?.community.local).toBe(true); expect(betaPost?.community.local).toBe(true);
expect(betaPost?.creator.local).toBe(false); expect(betaPost?.creator.local).toBe(false);
expect(betaPost?.counts.score).toBe(1); expect(betaPost?.counts.score).toBe(1);
assertPostFederation(betaPost, postRes.post_view); await assertPostFederation(betaPost, postRes.post_view);
// Delta only follows beta, so it should not see an alpha ap_id // Delta only follows beta, so it should not see an alpha ap_id
await expect( await expect(
@ -157,7 +168,7 @@ test("Unlike a post", async () => {
expect(betaPost?.community.local).toBe(true); expect(betaPost?.community.local).toBe(true);
expect(betaPost?.creator.local).toBe(false); expect(betaPost?.creator.local).toBe(false);
expect(betaPost?.counts.score).toBe(0); expect(betaPost?.counts.score).toBe(0);
assertPostFederation(betaPost, postRes.post_view); await assertPostFederation(betaPost, postRes.post_view);
}); });
test("Update a post", async () => { test("Update a post", async () => {
@ -178,7 +189,7 @@ test("Update a post", async () => {
expect(betaPost.community.local).toBe(true); expect(betaPost.community.local).toBe(true);
expect(betaPost.creator.local).toBe(false); expect(betaPost.creator.local).toBe(false);
expect(betaPost.post.name).toBe(updatedName); expect(betaPost.post.name).toBe(updatedName);
assertPostFederation(betaPost, updatedPost.post_view); await assertPostFederation(betaPost, updatedPost.post_view);
// Make sure lemmy beta cannot update the post // Make sure lemmy beta cannot update the post
await expect(editPost(beta, betaPost.post)).rejects.toStrictEqual( await expect(editPost(beta, betaPost.post)).rejects.toStrictEqual(
@ -329,7 +340,7 @@ test("Delete a post", async () => {
throw "Missing beta post 2"; throw "Missing beta post 2";
} }
expect(betaPost2.post.deleted).toBe(false); expect(betaPost2.post.deleted).toBe(false);
assertPostFederation(betaPost2, undeletedPost.post_view); await assertPostFederation(betaPost2, undeletedPost.post_view);
// Make sure lemmy beta cannot delete the post // Make sure lemmy beta cannot delete the post
await expect(deletePost(beta, true, betaPost2.post)).rejects.toStrictEqual( await expect(deletePost(beta, true, betaPost2.post)).rejects.toStrictEqual(
@ -372,7 +383,7 @@ test("Remove a post from admin and community on different instance", async () =>
// Make sure lemmy beta sees post is undeleted // Make sure lemmy beta sees post is undeleted
let betaPost2 = (await resolvePost(beta, postRes.post_view.post)).post; let betaPost2 = (await resolvePost(beta, postRes.post_view.post)).post;
expect(betaPost2?.post.removed).toBe(false); expect(betaPost2?.post.removed).toBe(false);
assertPostFederation(betaPost2, undeletedPost.post_view); await assertPostFederation(betaPost2!, undeletedPost.post_view);
}); });
test("Remove a post from admin and community on same instance", async () => { test("Remove a post from admin and community on same instance", async () => {
@ -403,7 +414,7 @@ test("Remove a post from admin and community on same instance", async () => {
p => p?.post_view.post.removed ?? false, p => p?.post_view.post.removed ?? false,
); );
expect(alphaPost?.post_view.post.removed).toBe(true); expect(alphaPost?.post_view.post.removed).toBe(true);
assertPostFederation(alphaPost.post_view, removePostRes.post_view); await assertPostFederation(alphaPost.post_view, removePostRes.post_view);
// Undelete // Undelete
let undeletedPost = await removePost(beta, false, betaPost.post); let undeletedPost = await removePost(beta, false, betaPost.post);
@ -416,7 +427,7 @@ test("Remove a post from admin and community on same instance", async () => {
p => !!p && !p.post.removed, p => !!p && !p.post.removed,
); );
expect(alphaPost2.post.removed).toBe(false); expect(alphaPost2.post.removed).toBe(false);
assertPostFederation(alphaPost2, undeletedPost.post_view); await assertPostFederation(alphaPost2, undeletedPost.post_view);
await unfollowRemotes(alpha); await unfollowRemotes(alpha);
}); });

View file

@ -1,17 +1,24 @@
use crate::{ use crate::{
context::LemmyContext, context::LemmyContext,
lemmy_db_schema::traits::Crud,
post::{LinkMetadata, OpenGraphData}, post::{LinkMetadata, OpenGraphData},
utils::proxy_image_link, send_activity::{ActivityChannel, SendActivityData},
utils::{local_site_opt_to_sensitive, proxy_image_link, proxy_image_link_opt_apub},
}; };
use activitypub_federation::config::Data; use activitypub_federation::config::Data;
use encoding::{all::encodings, DecoderTrap}; use encoding::{all::encodings, DecoderTrap};
use lemmy_db_schema::{ use lemmy_db_schema::{
newtypes::DbUrl, newtypes::DbUrl,
source::images::{LocalImage, LocalImageForm}, source::{
images::{LocalImage, LocalImageForm},
local_site::LocalSite,
post::{Post, PostUpdateForm},
},
}; };
use lemmy_utils::{ use lemmy_utils::{
error::{LemmyError, LemmyErrorType}, error::{LemmyError, LemmyErrorType},
settings::structs::{PictrsImageMode, Settings}, settings::structs::{PictrsImageMode, Settings},
spawn_try_task,
version::VERSION, version::VERSION,
REQWEST_TIMEOUT, REQWEST_TIMEOUT,
}; };
@ -83,6 +90,50 @@ pub async fn fetch_link_metadata_opt(
_ => Default::default(), _ => Default::default(),
} }
} }
/// Generate post thumbnail in background task, because some sites can be very slow to respond.
///
/// Takes a callback to generate a send activity task, so that post can be federated with metadata.
pub fn generate_post_link_metadata(
post: Post,
custom_thumbnail: Option<Url>,
send_activity: impl FnOnce(Post) -> Option<SendActivityData> + Send + 'static,
local_site: Option<LocalSite>,
context: Data<LemmyContext>,
) {
spawn_try_task(async move {
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = post.nsfw;
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let mut thumbnail_url = custom_thumbnail.or_else(|| post.thumbnail_url.map(Into::into));
let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail;
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(
post.url.map(Into::into).as_ref(),
do_generate_thumbnail,
&context,
)
.await;
if let Some(thumbnail_url_) = metadata.thumbnail {
thumbnail_url = Some(thumbnail_url_.into());
}
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?;
let form = PostUpdateForm {
embed_title: Some(metadata.opengraph_data.title),
embed_description: Some(metadata.opengraph_data.description),
embed_video_url: Some(metadata.opengraph_data.embed_video_url),
thumbnail_url: Some(thumbnail_url),
url_content_type: Some(metadata.content_type),
..Default::default()
};
let updated_post = Post::update(&mut context.pool(), post.id, &form).await?;
if let Some(send_activity) = send_activity(updated_post) {
ActivityChannel::submit_activity(send_activity, &context).await?;
}
Ok(())
});
}
/// Extract site metadata from HTML Opengraph attributes. /// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> { fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> {

View file

@ -4,8 +4,8 @@ use lemmy_api_common::{
build_response::build_post_response, build_response::build_post_response,
context::LemmyContext, context::LemmyContext,
post::{CreatePost, PostResponse}, post::{CreatePost, PostResponse},
request::fetch_link_metadata_opt, request::generate_post_link_metadata,
send_activity::{ActivityChannel, SendActivityData}, send_activity::SendActivityData,
utils::{ utils::{
check_community_user_action, check_community_user_action,
generate_local_apub_endpoint, generate_local_apub_endpoint,
@ -75,6 +75,7 @@ pub async fn create_post(
is_url_blocked(&url, &url_blocklist)?; is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?; check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?; check_url_scheme(&custom_thumbnail)?;
let url = proxy_image_link_opt_apub(url, &context).await?;
check_community_user_action( check_community_user_action(
&local_user_view.person, &local_user_view.person,
@ -98,18 +99,6 @@ pub async fn create_post(
} }
} }
// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none();
// Fetch post links and pictrs cached image
let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, &context).await;
let url = proxy_image_link_opt_apub(url, &context).await?;
let thumbnail_url = proxy_image_link_opt_apub(custom_thumbnail, &context)
.await?
.map(Into::into)
.or(metadata.thumbnail);
// Only need to check if language is allowed in case user set it explicitly. When using default // Only need to check if language is allowed in case user set it explicitly. When using default
// language, it already only returns allowed languages. // language, it already only returns allowed languages.
CommunityLanguage::is_allowed_community_language( CommunityLanguage::is_allowed_community_language(
@ -134,18 +123,13 @@ pub async fn create_post(
let post_form = PostInsertForm::builder() let post_form = PostInsertForm::builder()
.name(data.name.trim().to_string()) .name(data.name.trim().to_string())
.url_content_type(metadata.content_type)
.url(url) .url(url)
.body(body) .body(body)
.alt_text(data.alt_text.clone()) .alt_text(data.alt_text.clone())
.community_id(data.community_id) .community_id(data.community_id)
.creator_id(local_user_view.person.id) .creator_id(local_user_view.person.id)
.nsfw(data.nsfw) .nsfw(data.nsfw)
.embed_title(metadata.opengraph_data.title)
.embed_description(metadata.opengraph_data.description)
.embed_video_url(metadata.opengraph_data.embed_video_url)
.language_id(language_id) .language_id(language_id)
.thumbnail_url(thumbnail_url)
.build(); .build();
let inserted_post = Post::create(&mut context.pool(), &post_form) let inserted_post = Post::create(&mut context.pool(), &post_form)
@ -170,6 +154,14 @@ pub async fn create_post(
.await .await
.with_lemmy_type(LemmyErrorType::CouldntCreatePost)?; .with_lemmy_type(LemmyErrorType::CouldntCreatePost)?;
generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
|post| Some(SendActivityData::CreatePost(post)),
Some(local_site),
context.reset_request_count(),
);
// They like their own post by default // They like their own post by default
let person_id = local_user_view.person.id; let person_id = local_user_view.person.id;
let post_id = inserted_post.id; let post_id = inserted_post.id;
@ -183,9 +175,6 @@ pub async fn create_post(
.await .await
.with_lemmy_type(LemmyErrorType::CouldntLikePost)?; .with_lemmy_type(LemmyErrorType::CouldntLikePost)?;
ActivityChannel::submit_activity(SendActivityData::CreatePost(updated_post.clone()), &context)
.await?;
// Mark the post as read // Mark the post as read
mark_post_as_read(person_id, post_id, &mut context.pool()).await?; mark_post_as_read(person_id, post_id, &mut context.pool()).await?;

View file

@ -4,8 +4,8 @@ use lemmy_api_common::{
build_response::build_post_response, build_response::build_post_response,
context::LemmyContext, context::LemmyContext,
post::{EditPost, PostResponse}, post::{EditPost, PostResponse},
request::fetch_link_metadata, request::generate_post_link_metadata,
send_activity::{ActivityChannel, SendActivityData}, send_activity::SendActivityData,
utils::{ utils::{
check_community_user_action, check_community_user_action,
get_url_blocklist, get_url_blocklist,
@ -84,40 +84,11 @@ pub async fn update_post(
Err(LemmyErrorType::NoPostEditAllowed)? Err(LemmyErrorType::NoPostEditAllowed)?
} }
// Fetch post links and thumbnail if url was updated
let (embed_title, embed_description, embed_video_url, metadata_thumbnail, metadata_content_type) =
match &url {
Some(url) => {
// Only generate the thumbnail if there's no custom thumbnail provided,
// otherwise it will save it in pictrs
let generate_thumbnail = custom_thumbnail.is_none() || orig_post.thumbnail_url.is_none();
let metadata = fetch_link_metadata(url, generate_thumbnail, &context).await?;
(
Some(metadata.opengraph_data.title),
Some(metadata.opengraph_data.description),
Some(metadata.opengraph_data.embed_video_url),
Some(metadata.thumbnail),
Some(metadata.content_type),
)
}
_ => Default::default(),
};
let url = match url { let url = match url {
Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?), Some(url) => Some(proxy_image_link_opt_apub(Some(url), &context).await?),
_ => Default::default(), _ => Default::default(),
}; };
let custom_thumbnail = match custom_thumbnail {
Some(custom_thumbnail) => {
Some(proxy_image_link_opt_apub(Some(custom_thumbnail), &context).await?)
}
_ => Default::default(),
};
let thumbnail_url = custom_thumbnail.or(metadata_thumbnail);
let language_id = data.language_id; let language_id = data.language_id;
CommunityLanguage::is_allowed_community_language( CommunityLanguage::is_allowed_community_language(
&mut context.pool(), &mut context.pool(),
@ -129,15 +100,10 @@ pub async fn update_post(
let post_form = PostUpdateForm { let post_form = PostUpdateForm {
name: data.name.clone(), name: data.name.clone(),
url, url,
url_content_type: metadata_content_type,
body: diesel_option_overwrite(body), body: diesel_option_overwrite(body),
alt_text: diesel_option_overwrite(data.alt_text.clone()), alt_text: diesel_option_overwrite(data.alt_text.clone()),
nsfw: data.nsfw, nsfw: data.nsfw,
embed_title,
embed_description,
embed_video_url,
language_id: data.language_id, language_id: data.language_id,
thumbnail_url,
updated: Some(Some(naive_now())), updated: Some(Some(naive_now())),
..Default::default() ..Default::default()
}; };
@ -147,7 +113,13 @@ pub async fn update_post(
.await .await
.with_lemmy_type(LemmyErrorType::CouldntUpdatePost)?; .with_lemmy_type(LemmyErrorType::CouldntUpdatePost)?;
ActivityChannel::submit_activity(SendActivityData::UpdatePost(updated_post), &context).await?; generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
|post| Some(SendActivityData::UpdatePost(post)),
Some(local_site),
context.reset_request_count(),
);
build_post_response( build_post_response(
context.deref(), context.deref(),

View file

@ -24,10 +24,9 @@ use chrono::{DateTime, Utc};
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator}; use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
use lemmy_api_common::{ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
request::fetch_link_metadata_opt, request::generate_post_link_metadata,
utils::{ utils::{
get_url_blocklist, get_url_blocklist,
local_site_opt_to_sensitive,
local_site_opt_to_slur_regex, local_site_opt_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_apub, proxy_image_link_opt_apub,
@ -218,6 +217,7 @@ impl Object for ApubPost {
let old_post = page.id.dereference_local(context).await; let old_post = page.id.dereference_local(context).await;
let first_attachment = page.attachment.first(); let first_attachment = page.attachment.first();
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let form = if !page.is_mod_action(context).await? { let form = if !page.is_mod_action(context).await? {
let url = if let Some(attachment) = first_attachment.cloned() { let url = if let Some(attachment) = first_attachment.cloned() {
@ -231,20 +231,8 @@ impl Object for ApubPost {
check_url_scheme(&url)?; check_url_scheme(&url)?;
let alt_text = first_attachment.cloned().and_then(Attachment::alt_text); let alt_text = first_attachment.cloned().and_then(Attachment::alt_text);
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = page.sensitive.unwrap_or(false);
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let mut thumbnail_url = page.image.map(|i| i.url);
let do_generate_thumbnail = thumbnail_url.is_none() && allow_generate_thumbnail;
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(url.as_ref(), do_generate_thumbnail, context).await;
if let Some(thumbnail_url_) = metadata.thumbnail {
thumbnail_url = Some(thumbnail_url_.into());
}
let url = proxy_image_link_opt_apub(url, context).await?; let url = proxy_image_link_opt_apub(url, context).await?;
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?; let url_blocklist = get_url_blocklist(context).await?;
@ -254,30 +242,22 @@ impl Object for ApubPost {
let language_id = let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;
PostInsertForm { PostInsertForm::builder()
name, .name(name)
url: url.map(Into::into), .url(url.map(Into::into))
body, .body(body)
alt_text, .alt_text(alt_text)
creator_id: creator.id, .creator_id(creator.id)
community_id: community.id, .community_id(community.id)
removed: None, .locked(page.comments_enabled.map(|e| !e))
locked: page.comments_enabled.map(|e| !e), .published(page.published.map(Into::into))
published: page.published.map(Into::into), .updated(page.updated.map(Into::into))
updated: page.updated.map(Into::into), .deleted(Some(false))
deleted: Some(false), .nsfw(page.sensitive)
nsfw: page.sensitive, .ap_id(Some(page.id.clone().into()))
embed_title: metadata.opengraph_data.title, .local(Some(false))
embed_description: metadata.opengraph_data.description, .language_id(language_id)
embed_video_url: metadata.opengraph_data.embed_video_url, .build()
thumbnail_url,
ap_id: Some(page.id.clone().into()),
local: Some(false),
language_id,
featured_community: None,
featured_local: None,
url_content_type: metadata.content_type,
}
} else { } else {
// if is mod action, only update locked/stickied fields, nothing else // if is mod action, only update locked/stickied fields, nothing else
PostInsertForm::builder() PostInsertForm::builder()
@ -292,6 +272,14 @@ impl Object for ApubPost {
let post = Post::create(&mut context.pool(), &form).await?; let post = Post::create(&mut context.pool(), &form).await?;
generate_post_link_metadata(
post.clone(),
page.image.map(|i| i.url),
|_| None,
local_site,
context.reset_request_count(),
);
// write mod log entry for lock // write mod log entry for lock
if Page::is_locked_changed(&old_post, &page.comments_enabled) { if Page::is_locked_changed(&old_post, &page.comments_enabled) {
let form = ModLockPostForm { let form = ModLockPostForm {