mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-27 05:13:10 +00:00
Introduce wcs2string_callback
This is like wcs2string, but instead of returning a std::string, it invokes a user-supplied function with each converted character. The idea is to allow interleaved conversion and output.
This commit is contained in:
parent
c9b42c6f1f
commit
a0cb23bea5
3 changed files with 52 additions and 29 deletions
|
@ -332,38 +332,13 @@ wcstring str2wcstring(const std::string &in, size_t len) {
|
|||
return str2wcs_internal(in.data(), len);
|
||||
}
|
||||
|
||||
/// This function is distinguished from wcs2str_internal in that it allows embedded null bytes.
|
||||
std::string wcs2string(const wcstring &input) {
|
||||
std::string result;
|
||||
result.reserve(input.size());
|
||||
|
||||
mbstate_t state = {};
|
||||
char converted[MB_LEN_MAX];
|
||||
|
||||
for (auto wc : input) {
|
||||
if (wc == INTERNAL_SEPARATOR) {
|
||||
// do nothing
|
||||
} else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
|
||||
result.push_back(wc - ENCODE_DIRECT_BASE);
|
||||
} else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859)
|
||||
// If `wc` contains a wide character we emit a question-mark.
|
||||
if (wc & ~0xFF) {
|
||||
wc = '?';
|
||||
}
|
||||
converted[0] = wc;
|
||||
result.append(converted, 1);
|
||||
} else {
|
||||
std::memset(converted, 0, sizeof converted);
|
||||
size_t len = std::wcrtomb(converted, wc, &state);
|
||||
if (len == static_cast<size_t>(-1)) {
|
||||
FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc);
|
||||
std::memset(&state, 0, sizeof(state));
|
||||
} else {
|
||||
result.append(converted, len);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wcs2string_callback(input.data(), input.size(), [&](const char *buff, size_t bufflen) {
|
||||
result.append(buff, bufflen);
|
||||
return true;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <locale>
|
||||
|
||||
#include "common.h"
|
||||
#include "flog.h"
|
||||
|
||||
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) {
|
||||
using size_type = wcstring::size_type;
|
||||
|
@ -196,3 +197,7 @@ wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void wcs2string_bad_char(wchar_t wc) {
|
||||
FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc);
|
||||
}
|
||||
|
|
|
@ -3,10 +3,12 @@
|
|||
#define FISH_WCSTRINGUTIL_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "common.h"
|
||||
#include "expand.h"
|
||||
|
||||
/// Test if a string prefixes another. Returns true if a is a prefix of b.
|
||||
bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value);
|
||||
|
@ -136,6 +138,47 @@ wcstring trim(wcstring input, const wchar_t *any_of);
|
|||
/// Converts a string to lowercase.
|
||||
wcstring wcstolower(wcstring input);
|
||||
|
||||
// Out-of-line helper for wcs2string_callback.
|
||||
void wcs2string_bad_char(wchar_t);
|
||||
|
||||
/// Implementation of wcs2string that accepts a callback.
|
||||
/// This invokes \p func with (const char*, size_t) pairs.
|
||||
/// If \p func returns false, it stops; otherwise it continues.
|
||||
/// \return false if the callback returned false, otherwise true.
|
||||
template <typename Func>
|
||||
bool wcs2string_callback(const wchar_t *input, size_t len, const Func &func) {
|
||||
mbstate_t state = {};
|
||||
char converted[MB_LEN_MAX];
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
wchar_t wc = input[i];
|
||||
// TODO: this doesn't seem sound.
|
||||
if (wc == INTERNAL_SEPARATOR) {
|
||||
// do nothing
|
||||
} else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
|
||||
converted[0] = wc - ENCODE_DIRECT_BASE;
|
||||
if (!func(converted, 1)) return false;
|
||||
} else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859)
|
||||
// If `wc` contains a wide character we emit a question-mark.
|
||||
if (wc & ~0xFF) {
|
||||
wc = '?';
|
||||
}
|
||||
converted[0] = wc;
|
||||
if (!func(converted, 1)) return false;
|
||||
} else {
|
||||
std::memset(converted, 0, sizeof converted);
|
||||
size_t len = std::wcrtomb(converted, wc, &state);
|
||||
if (len == static_cast<size_t>(-1)) {
|
||||
wcs2string_bad_char(wc);
|
||||
std::memset(&state, 0, sizeof(state));
|
||||
} else {
|
||||
if (!func(converted, len)) return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Support for iterating over a newline-separated string.
|
||||
template <typename Collection>
|
||||
class line_iterator_t {
|
||||
|
|
Loading…
Reference in a new issue