From e590a811eb831c946c15bea3d1c191e554afd071 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Thu, 21 Nov 2024 22:58:24 +0100 Subject: [PATCH] update --- crates/db_schema/src/detect_language.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/crates/db_schema/src/detect_language.rs b/crates/db_schema/src/detect_language.rs index 925f5b89e..9835a0e4d 100644 --- a/crates/db_schema/src/detect_language.rs +++ b/crates/db_schema/src/detect_language.rs @@ -1,9 +1,18 @@ use crate::{newtypes::LanguageId, source::language::Language, utils::DbPool}; use lemmy_utils::error::LemmyResult; -use lingua::{IsoCode639_1, Language as LinguaLanguage, LanguageDetectorBuilder}; +use lingua::{IsoCode639_1, Language as LinguaLang, LanguageDetectorBuilder}; pub async fn detect_language(input: &str, pool: &mut DbPool<'_>) -> LemmyResult { // TODO: should only detect languages which are allowed in community + // TODO: cache conversion table Lingua to LanguageId and reverse (maybe load it directly from + // migration sql) + // TODO: instead of at post creation, could also run this as a background task + // TODO: probably uses a lot of ram/cpu, need to make it configurable: + // - analyze only local posts or all posts + // - low accuracy or high accuracy setting + // - min confidence value + // + // >>>> This should be a plugin! let detector = LanguageDetectorBuilder::from_iso_codes_639_1(&[ IsoCode639_1::EN, IsoCode639_1::ES, @@ -11,7 +20,7 @@ pub async fn detect_language(input: &str, pool: &mut DbPool<'_>) -> LemmyResult< ]) .build(); - let lang: Option = detector.detect_language_of(input); + let lang: Option = detector.detect_language_of(input); let Some(lang) = lang else { return Ok(LanguageId(0)); };