Adding a new config flag to disable pictrs caching for thumbnails (#3897)

* add logic to prevent downloading remote pictrs images

* apply formatting

* Do not attempt a pictrs fetch if the remote image is also on a pictrs instance

* Do not attempt a pictrs fetch if the remote image is also on a pictrs instance and cache_federated_images is false

* Generalising the no caching option to handle all remote images

* rustfmt

* Return None if the URL is not an image

* Updating defaults.hjson

* fixing typo

* Fixing typo

* Skip cloning the Url unless we need to

* using a HEAD request for checking the content type, saving bandwidth/improving perf

* Removing early returns

* Switching back to GET requests for Content-Type because pictrs does not handle HEAD requests

* Simplifying logic and using metadata_image instead of url if we do not get a pictrs thumbnail

* Removing unused import

* Return None as a thumbnail if caching is disabled

* formatting

---------

Co-authored-by: Djones4822 <david.jones4822@gmail.com>
This commit is contained in:
Nick Webster 2023-09-01 02:36:39 +12:00 committed by GitHub
parent 15930cbf4d
commit a57658d99c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 53 deletions

View file

@ -43,6 +43,8 @@
url: "http://localhost:8080/" url: "http://localhost:8080/"
# Set a custom pictrs API key. ( Required for deleting images ) # Set a custom pictrs API key. ( Required for deleting images )
api_key: "string" api_key: "string"
# Cache remote images
cache_remote_images: true
} }
# Email sending configuration. All options except login/password are mandatory # Email sending configuration. All options except login/password are mandatory
email: { email: {

View file

@ -123,24 +123,29 @@ pub(crate) async fn fetch_pictrs(
let pictrs_config = settings.pictrs_config()?; let pictrs_config = settings.pictrs_config()?;
is_image_content_type(client, image_url).await?; is_image_content_type(client, image_url).await?;
let fetch_url = format!( if pictrs_config.cache_remote_images {
"{}image/download?url={}", // fetch remote non-pictrs images for persistent thumbnail link
pictrs_config.url, let fetch_url = format!(
utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed "{}image/download?url={}",
); pictrs_config.url,
utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
);
let response = client let response = client
.get(&fetch_url) .get(&fetch_url)
.timeout(REQWEST_TIMEOUT) .timeout(REQWEST_TIMEOUT)
.send() .send()
.await?; .await?;
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" { if response.msg == "ok" {
Ok(response) Ok(response)
} else {
Err(LemmyErrorType::PictrsResponseError(response.msg))?
}
} else { } else {
Err(LemmyErrorType::PictrsResponseError(response.msg))? Err(LemmyErrorType::PictrsCachingDisabled)?
} }
} }
@ -185,7 +190,7 @@ pub async fn purge_image_from_pictrs(
} }
/// Both are options, since the URL might be either an html page, or an image /// Both are options, since the URL might be either an html page, or an image
/// Returns the SiteMetadata, and a Pictrs URL, if there is a picture associated /// Returns the SiteMetadata, and an image URL, if there is a picture associated
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub async fn fetch_site_data( pub async fn fetch_site_data(
client: &ClientWithMiddleware, client: &ClientWithMiddleware,
@ -200,50 +205,46 @@ pub async fn fetch_site_data(
// Warning, this may ignore SSL errors // Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(client, url).await.ok(); let metadata_option = fetch_site_metadata(client, url).await.ok();
if !include_image { if !include_image {
return (metadata_option, None); (metadata_option, None)
} else {
let thumbnail_url =
fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url)
.await
.ok();
(metadata_option, thumbnail_url)
} }
let missing_pictrs_file =
|r: PictrsResponse| r.files.first().expect("missing pictrs file").file.clone();
// Fetch pictrs thumbnail
let pictrs_hash = match &metadata_option {
Some(metadata_res) => match &metadata_res.image {
// Metadata, with image
// Try to generate a small thumbnail if there's a full sized one from post-links
Some(metadata_image) => fetch_pictrs(client, settings, metadata_image)
.await
.map(missing_pictrs_file),
// Metadata, but no image
None => fetch_pictrs(client, settings, url)
.await
.map(missing_pictrs_file),
},
// No metadata, try to fetch the URL as an image
None => fetch_pictrs(client, settings, url)
.await
.map(missing_pictrs_file),
};
// The full urls are necessary for federation
let pictrs_thumbnail = pictrs_hash
.map(|p| {
Url::parse(&format!(
"{}/pictrs/image/{}",
settings.get_protocol_and_hostname(),
p
))
.ok()
})
.ok()
.flatten();
(metadata_option, pictrs_thumbnail.map(Into::into))
} }
None => (None, None), None => (None, None),
} }
} }
async fn fetch_pictrs_url_from_site_metadata(
client: &ClientWithMiddleware,
metadata_option: &Option<SiteMetadata>,
settings: &Settings,
url: &Url,
) -> Result<DbUrl, LemmyError> {
let pictrs_res = match metadata_option {
Some(metadata_res) => match &metadata_res.image {
// Metadata, with image
// Try to generate a small thumbnail if there's a full sized one from post-links
Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await,
// Metadata, but no image
None => fetch_pictrs(client, settings, url).await,
},
// No metadata, try to fetch the URL as an image
None => fetch_pictrs(client, settings, url).await,
}?;
Url::parse(&format!(
"{}/pictrs/image/{}",
settings.get_protocol_and_hostname(),
pictrs_res.files.first().expect("missing pictrs file").file
))
.map(Into::into)
.map_err(Into::into)
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> { async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> {
let response = client.get(url.as_str()).send().await?; let response = client.get(url.as_str()).send().await?;

View file

@ -87,6 +87,7 @@ pub enum LemmyErrorType {
SiteMetadataPageIsNotDoctypeHtml, SiteMetadataPageIsNotDoctypeHtml,
PictrsResponseError(String), PictrsResponseError(String),
PictrsPurgeResponseError(String), PictrsPurgeResponseError(String),
PictrsCachingDisabled,
ImageUrlMissingPathSegments, ImageUrlMissingPathSegments,
ImageUrlMissingLastPathSegment, ImageUrlMissingLastPathSegment,
PictrsApiKeyNotProvided, PictrsApiKeyNotProvided,

View file

@ -62,6 +62,10 @@ pub struct PictrsConfig {
/// Set a custom pictrs API key. ( Required for deleting images ) /// Set a custom pictrs API key. ( Required for deleting images )
#[default(None)] #[default(None)]
pub api_key: Option<String>, pub api_key: Option<String>,
/// Cache remote images
#[default(true)]
pub cache_remote_images: bool,
} }
#[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)] #[derive(Debug, Deserialize, Serialize, Clone, SmartDefault, Document)]

View file

@ -21,6 +21,7 @@
pictrs: { pictrs: {
url: "http://pictrs:8080/" url: "http://pictrs:8080/"
# api_key: "API_KEY" # api_key: "API_KEY"
cache_remote_images: true
} }
#opentelemetry_url: "http://otel:4137" #opentelemetry_url: "http://otel:4137"