Port wgettext wrapper from wutil.cpp

This introduces a string allocations on lookups of strings that are not
known at compile time; we should get rid of these allocations in future.
This commit is contained in:
Johannes Altmanninger 2024-01-01 20:39:38 +01:00
parent cd19f00531
commit 3e3441429a
6 changed files with 106 additions and 23 deletions

View file

@ -74,6 +74,7 @@ corrosion_set_env_vars(${fish_rust_target}
"DATADIR=${CMAKE_INSTALL_FULL_DATADIR}"
"SYSCONFDIR=${CMAKE_INSTALL_FULL_SYSCONFDIR}"
"BINDIR=${CMAKE_INSTALL_FULL_BINDIR}"
"LOCALEDIR=${CMAKE_INSTALL_FULL_LOCALEDIR}"
)
# this needs an extra fish-rust due to the poor source placement

View file

@ -4,7 +4,7 @@ use std::error::Error;
use std::process::Stdio;
fn main() {
for key in ["DOCDIR", "DATADIR", "SYSCONFDIR", "BINDIR"] {
for key in ["DOCDIR", "DATADIR", "SYSCONFDIR", "BINDIR", "LOCALEDIR"] {
if let Ok(val) = env::var(key) {
// Forward some CMake config
println!("cargo:rustc-env={key}={val}");

View file

@ -1,4 +1,5 @@
use std::{
borrow::Cow,
cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet},
mem,
@ -80,11 +81,11 @@ fn C_(s: &wstr) -> &'static wstr {
if s.is_empty() {
L!("")
} else {
wgettext_impl_do_not_use_directly(
wgettext_impl_do_not_use_directly(Cow::Owned(
U32CString::from_ustr(s)
.expect("translation string without NUL bytes")
.as_slice_with_nul(),
)
.into_vec_with_nul(),
))
}
}

View file

@ -40,8 +40,6 @@ include_cpp! {
generate_pod!("wcharz_t")
generate!("wcstring_list_ffi_t")
generate!("wgettext_ptr")
generate!("highlight_spec_t")
generate!("rgb_color_t")

View file

@ -68,7 +68,7 @@ use std::sync::Arc;
// FIXME: when the crate is actually called fish and not fish-rust, read this from cargo
// See: https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates
// for reference
const PACKAGE_NAME: &str = "fish"; // env!("CARGO_PKG_NAME");
pub const PACKAGE_NAME: &str = "fish"; // env!("CARGO_PKG_NAME");
// FIXME: the following should just use env!(), this is to make `cargo test` work without CMake for now
const DOC_DIR: &str = {

View file

@ -1,32 +1,115 @@
use crate::ffi;
use crate::wchar::wstr;
use crate::wchar_ffi::{wchar_t, wcslen};
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::ffi::CString;
use std::pin::Pin;
use std::sync::Mutex;
use crate::common::{charptr2wcstring, wcs2zstring};
use crate::fish::PACKAGE_NAME;
use crate::wchar::prelude::*;
use crate::wchar_ffi::wchar_t;
use errno::{errno, set_errno};
use once_cell::sync::{Lazy, OnceCell};
use widestring::U32CString;
/// Support for wgettext.
#[cfg(feature = "gettext")]
mod internal {
use libc::c_char;
use std::ffi::CStr;
extern "C" {
fn gettext(msgid: *const c_char) -> *mut c_char;
fn bindtextdomain(domainname: *const c_char, dirname: *const c_char) -> *mut c_char;
fn textdomain(domainname: *const c_char) -> *mut c_char;
}
pub fn fish_gettext(msgid: &CStr) -> *const c_char {
unsafe { gettext(msgid.as_ptr()) }
}
pub fn fish_bindtextdomain(domainname: &CStr, dirname: &CStr) -> *mut c_char {
unsafe { bindtextdomain(domainname.as_ptr(), dirname.as_ptr()) }
}
pub fn fish_textdomain(domainname: &CStr) -> *mut c_char {
unsafe { textdomain(domainname.as_ptr()) }
}
}
#[cfg(not(feature = "gettext"))]
mod internal {
use libc::c_char;
use std::ffi::CStr;
pub fn fish_gettext(msgid: &CStr) -> *const c_char {
msgid.as_ptr()
}
pub fn fish_bindtextdomain(_domainname: &CStr, _dirname: &CStr) -> *mut c_char {
std::ptr::null_mut()
}
pub fn fish_textdomain(_domainname: &CStr) -> *mut c_char {
std::ptr::null_mut()
}
}
use internal::*;
// Really init wgettext.
fn wgettext_really_init() {
let package_name = CString::new(PACKAGE_NAME).unwrap();
let localedir = CString::new(option_env!("LOCALEDIR").unwrap_or("UNDEFINED")).unwrap();
fish_bindtextdomain(&package_name, &localedir);
fish_textdomain(&package_name);
}
fn wgettext_init_if_necessary() {
static INIT: OnceCell<()> = OnceCell::new();
INIT.get_or_init(wgettext_really_init);
}
/// Implementation detail for wgettext!.
pub fn wgettext_impl_do_not_use_directly(text: &[wchar_t]) -> &'static wstr {
/// Wide character wrapper around the gettext function. For historic reasons, unlike the real
/// gettext function, wgettext takes care of setting the correct domain, etc. using the textdomain
/// and bindtextdomain functions. This should probably be moved out of wgettext, so that wgettext
/// will be nothing more than a wrapper around gettext, like all other functions in this file.
pub fn wgettext_impl_do_not_use_directly(text: Cow<'static, [wchar_t]>) -> &'static wstr {
assert_eq!(text.last(), Some(&0), "should be nul-terminated");
let res: *const wchar_t = ffi::wgettext_ptr(text.as_ptr());
#[allow(clippy::unnecessary_cast)]
let slice = unsafe { std::slice::from_raw_parts(res as *const u32, wcslen(res)) };
wstr::from_slice(slice).expect("Invalid UTF-32")
// Preserve errno across this since this is often used in printing error messages.
let err = errno();
wgettext_init_if_necessary();
#[allow(clippy::type_complexity)]
static WGETTEXT_MAP: Lazy<Mutex<HashMap<Cow<'static, [wchar_t]>, Pin<Box<WString>>>>> =
Lazy::new(|| Mutex::new(HashMap::new()));
let mut wmap = WGETTEXT_MAP.lock().unwrap();
let v = match wmap.entry(text) {
Entry::Occupied(v) => Pin::get_ref(Pin::as_ref(v.get())) as *const WString,
Entry::Vacant(v) => {
let key = wstr::from_slice(v.key()).unwrap();
let mbs_in = wcs2zstring(key);
let out = fish_gettext(&mbs_in);
let out = charptr2wcstring(out);
let res = Pin::new(Box::new(out));
let value = v.insert(res);
Pin::get_ref(Pin::as_ref(value)) as *const WString
}
};
set_errno(err);
// The returned string is stored in the map.
// TODO: If we want to shrink the map, this would be a problem.
unsafe { v.as_ref().unwrap() }.as_utfstr()
}
/// Get a (possibly translated) string from a non-literal.
pub fn wgettext_str(s: &wstr) -> &'static wstr {
let cstr: U32CString = U32CString::from_chars_truncate(s.as_char_slice());
wgettext_impl_do_not_use_directly(cstr.as_slice_with_nul())
wgettext_impl_do_not_use_directly(Cow::Owned(cstr.into_vec_with_nul()))
}
/// Get a (possibly translated) string from a string literal.
/// This returns a &'static wstr.
macro_rules! wgettext {
($string:expr) => {
crate::wutil::gettext::wgettext_impl_do_not_use_directly(
crate::wchar_ffi::u32cstr!($string).as_slice_with_nul(),
)
crate::wutil::gettext::wgettext_impl_do_not_use_directly(std::borrow::Cow::Borrowed(
widestring::u32cstr!($string).as_slice_with_nul(),
))
};
}
pub(crate) use wgettext;
@ -34,9 +117,9 @@ pub(crate) use wgettext;
/// Like wgettext, but for non-literals.
macro_rules! wgettext_expr {
($string:expr) => {
crate::wutil::gettext::wgettext_impl_do_not_use_directly(
widestring::U32CString::from_ustr_truncate($string).as_slice_with_nul(),
)
crate::wutil::gettext::wgettext_impl_do_not_use_directly(std::borrow::Cow::Owned(
widestring::U32CString::from_ustr_truncate($string).into_vec_with_nul(),
))
};
}
pub(crate) use wgettext_expr;