Make fish_xgettext sorta work with rust

This is absolutely disgusting code, but it works out okay-ish.

The problem is xgettext has no rust support (it's stuck in review
limbo). So we use cargo-expand to extract all invocations of
gettext, and massage all that to generate a
messages.pot ourselves.

We also assume any string constant could be translated.
This commit is contained in:
Fabian Boehm 2024-03-05 17:24:17 +01:00
parent 97e7e730e1
commit d91ad2976c

View file

@ -1,18 +1,40 @@
#!/usr/bin/env fish
#
# Tool to generate messages.pot
# Extended to replace the old Makefile rule which did not port easily to CMake
# This script was originally motivated to work around a quirk (or bug depending on your viewpoint)
# of the xgettext command. See https://lists.gnu.org/archive/html/bug-gettext/2014-11/msg00006.html.
# However, it turns out that even if that quirk did not exist we would still need something like
# this script to properly extract descriptions. That's because we need to normalize the strings to
# a format that xgettext will handle correctly. Also, `xgettext -LShell` doesn't correctly extract
# all the strings we want translated. So we extract and normalize all such strings into a format
# that `xgettext` can handle.
# Create temporary directory for these operations. OS X `mktemp` is somewhat restricted, so this block
# works around that - based on share/functions/funced.fish.
set -q TMPDIR
or set -l TMPDIR /tmp
set -l tmpdir (mktemp -d $TMPDIR/fish.XXXXXX)
or exit 1
# Start with the C++ source
xgettext -k -k_ -kN_ -LC++ --no-wrap -o messages.pot src/*.cpp src/*.h
# This is a gigantic crime.
# xgettext still does not support rust *at all*, so we use cargo-expand to get all our wgettext invocations.
set -l expanded (cargo expand --lib; for f in fish{,_indent,_key_reader}; cargo expand --bin $f; end)
# Extract any gettext call
set -l strs (printf '%s\n' $expanded | grep -A1 wgettext_static_str |
grep 'widestring::internals::core::primitive::str =' |
string match -rg '"(.*)"' | string match -rv '^%ls$|^$' |
# escaping difference between gettext and cargo-expand: single-quotes
string replace -a "\'" "'" | sort -u)
# Extract any constants
set -a strs (string match -rv 'BUILD_VERSION:|PACKAGE_NAME' -- $expanded |
string match -rg 'const [A-Z_]*: &str = "(.*)"' | string replace -a "\'" "'")
# We construct messages.pot ourselves instead of forcing this into msgmerge or whatever.
# The escaping so far works out okay.
for str in $strs
# grep -P needed for string escape to be compatible (PCRE-style),
# -H gives the filename, -n the line number.
# If you want to run this on non-GNU grep: Don't.
echo "#:" (grep -PHn -r -- (string escape --style=regex -- $str) src/ |
head -n1 | string replace -r ':\s.*' '')
echo "msgid \"$str\""
echo 'msgstr ""'
end >messages.pot
# This regex handles descriptions for `complete` and `function` statements. These messages are not
# particularly important to translate. Hence the "implicit" label.
@ -22,39 +44,22 @@ set -l implicit_regex '(?:^| +)(?:complete|function).*? (?:-d|--description) (([
# than messages which should be implicitly translated.
set -l explicit_regex '.*\( *_ (([\'"]).+?(?<!\\\\)\\2) *\).*'
# Create temporary directory for these operations. OS X `mktemp` is somewhat restricted, so this block
# works around that - based on share/functions/funced.fish.
set -q TMPDIR
or set -l TMPDIR /tmp
set -l tmpdir (mktemp -d $TMPDIR/fish.XXXXXX)
or exit 1
mkdir -p $tmpdir/implicit/share/completions $tmpdir/implicit/share/functions
mkdir -p $tmpdir/explicit/share/completions $tmpdir/explicit/share/functions
for f in share/config.fish share/completions/*.fish share/functions/*.fish
# Extract explicit attempts to translate a message. That is, those that are of the form
# `(_ "message")`.
string replace --filter --regex $explicit_regex 'echo $1' <$f | fish >$tmpdir/explicit/$f.tmp 2>/dev/null
while read description
echo 'N_ "'(string replace --all '"' '\\"' -- $description)'"'
end <$tmpdir/explicit/$f.tmp >$tmpdir/explicit/$f
rm $tmpdir/explicit/$f.tmp
string replace --filter --regex $explicit_regex '$1' <$f | string unescape \
| string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/explicit/$f
# Handle `complete` / `function` description messages. The `| fish` is subtle. It basically
# avoids the need to use `source` with a command substitution that could affect the current
# shell.
string replace --filter --regex $implicit_regex 'echo $1' <$f | fish >$tmpdir/implicit/$f.tmp 2>/dev/null
while read description
# We don't use `string escape` as shown in the next comment because it produces output that
# is not parsed correctly by xgettext. Instead just escape double-quotes and quote the
# resulting string.
echo 'N_ "'(string replace --all '"' '\\"' -- $description)'"'
end <$tmpdir/implicit/$f.tmp >$tmpdir/implicit/$f
rm $tmpdir/implicit/$f.tmp
string replace --filter --regex $implicit_regex '$1' <$f | string unescape \
| string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/implicit/$f
end
xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/explicit/share/*/*.fish
xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/implicit/share/*/*.fish
xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/{ex,im}plicit/share/*/*.fish
rm -r $tmpdir