From d91ad2976c1735654fa881b0a44dc625d23c1d25 Mon Sep 17 00:00:00 2001 From: Fabian Boehm Date: Tue, 5 Mar 2024 17:24:17 +0100 Subject: [PATCH] Make fish_xgettext sorta work with rust This is absolutely disgusting code, but it works out okay-ish. The problem is xgettext has no rust support (it's stuck in review limbo). So we use cargo-expand to extract all invocations of gettext, and massage all that to generate a messages.pot ourselves. We also assume any string constant could be translated. --- build_tools/fish_xgettext.fish | 69 ++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 32 deletions(-) diff --git a/build_tools/fish_xgettext.fish b/build_tools/fish_xgettext.fish index 6977d7c3f..77b8e9d17 100755 --- a/build_tools/fish_xgettext.fish +++ b/build_tools/fish_xgettext.fish @@ -1,18 +1,40 @@ #!/usr/bin/env fish # # Tool to generate messages.pot -# Extended to replace the old Makefile rule which did not port easily to CMake -# This script was originally motivated to work around a quirk (or bug depending on your viewpoint) -# of the xgettext command. See https://lists.gnu.org/archive/html/bug-gettext/2014-11/msg00006.html. -# However, it turns out that even if that quirk did not exist we would still need something like -# this script to properly extract descriptions. That's because we need to normalize the strings to -# a format that xgettext will handle correctly. Also, `xgettext -LShell` doesn't correctly extract -# all the strings we want translated. So we extract and normalize all such strings into a format -# that `xgettext` can handle. +# Create temporary directory for these operations. OS X `mktemp` is somewhat restricted, so this block +# works around that - based on share/functions/funced.fish. +set -q TMPDIR +or set -l TMPDIR /tmp +set -l tmpdir (mktemp -d $TMPDIR/fish.XXXXXX) +or exit 1 -# Start with the C++ source -xgettext -k -k_ -kN_ -LC++ --no-wrap -o messages.pot src/*.cpp src/*.h +# This is a gigantic crime. +# xgettext still does not support rust *at all*, so we use cargo-expand to get all our wgettext invocations. +set -l expanded (cargo expand --lib; for f in fish{,_indent,_key_reader}; cargo expand --bin $f; end) + +# Extract any gettext call +set -l strs (printf '%s\n' $expanded | grep -A1 wgettext_static_str | + grep 'widestring::internals::core::primitive::str =' | + string match -rg '"(.*)"' | string match -rv '^%ls$|^$' | + # escaping difference between gettext and cargo-expand: single-quotes + string replace -a "\'" "'" | sort -u) + +# Extract any constants +set -a strs (string match -rv 'BUILD_VERSION:|PACKAGE_NAME' -- $expanded | + string match -rg 'const [A-Z_]*: &str = "(.*)"' | string replace -a "\'" "'") + +# We construct messages.pot ourselves instead of forcing this into msgmerge or whatever. +# The escaping so far works out okay. +for str in $strs + # grep -P needed for string escape to be compatible (PCRE-style), + # -H gives the filename, -n the line number. + # If you want to run this on non-GNU grep: Don't. + echo "#:" (grep -PHn -r -- (string escape --style=regex -- $str) src/ | + head -n1 | string replace -r ':\s.*' '') + echo "msgid \"$str\"" + echo 'msgstr ""' +end >messages.pot # This regex handles descriptions for `complete` and `function` statements. These messages are not # particularly important to translate. Hence the "implicit" label. @@ -22,39 +44,22 @@ set -l implicit_regex '(?:^| +)(?:complete|function).*? (?:-d|--description) (([ # than messages which should be implicitly translated. set -l explicit_regex '.*\( *_ (([\'"]).+?(?$tmpdir/explicit/$f.tmp 2>/dev/null - while read description - echo 'N_ "'(string replace --all '"' '\\"' -- $description)'"' - end <$tmpdir/explicit/$f.tmp >$tmpdir/explicit/$f - rm $tmpdir/explicit/$f.tmp + string replace --filter --regex $explicit_regex '$1' <$f | string unescape \ + | string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/explicit/$f # Handle `complete` / `function` description messages. The `| fish` is subtle. It basically # avoids the need to use `source` with a command substitution that could affect the current # shell. - string replace --filter --regex $implicit_regex 'echo $1' <$f | fish >$tmpdir/implicit/$f.tmp 2>/dev/null - while read description - # We don't use `string escape` as shown in the next comment because it produces output that - # is not parsed correctly by xgettext. Instead just escape double-quotes and quote the - # resulting string. - echo 'N_ "'(string replace --all '"' '\\"' -- $description)'"' - end <$tmpdir/implicit/$f.tmp >$tmpdir/implicit/$f - rm $tmpdir/implicit/$f.tmp + string replace --filter --regex $implicit_regex '$1' <$f | string unescape \ + | string replace --all '"' '\\"' | string replace -r '(.*)' 'N_ "$1"' >$tmpdir/implicit/$f end -xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/explicit/share/*/*.fish -xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/implicit/share/*/*.fish +xgettext -j -k -kN_ -LShell --from-code=UTF-8 -cDescription --no-wrap -o messages.pot $tmpdir/{ex,im}plicit/share/*/*.fish rm -r $tmpdir