mirror of
https://github.com/fish-shell/fish-shell
synced 2025-01-15 22:44:01 +00:00
Introduce wcs2string_callback
This is like wcs2string, but instead of returning a std::string, it invokes a user-supplied function with each converted character. The idea is to allow interleaved conversion and output.
This commit is contained in:
parent
c9b42c6f1f
commit
a0cb23bea5
3 changed files with 52 additions and 29 deletions
|
@ -332,38 +332,13 @@ wcstring str2wcstring(const std::string &in, size_t len) {
|
||||||
return str2wcs_internal(in.data(), len);
|
return str2wcs_internal(in.data(), len);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This function is distinguished from wcs2str_internal in that it allows embedded null bytes.
|
|
||||||
std::string wcs2string(const wcstring &input) {
|
std::string wcs2string(const wcstring &input) {
|
||||||
std::string result;
|
std::string result;
|
||||||
result.reserve(input.size());
|
result.reserve(input.size());
|
||||||
|
wcs2string_callback(input.data(), input.size(), [&](const char *buff, size_t bufflen) {
|
||||||
mbstate_t state = {};
|
result.append(buff, bufflen);
|
||||||
char converted[MB_LEN_MAX];
|
return true;
|
||||||
|
});
|
||||||
for (auto wc : input) {
|
|
||||||
if (wc == INTERNAL_SEPARATOR) {
|
|
||||||
// do nothing
|
|
||||||
} else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
|
|
||||||
result.push_back(wc - ENCODE_DIRECT_BASE);
|
|
||||||
} else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859)
|
|
||||||
// If `wc` contains a wide character we emit a question-mark.
|
|
||||||
if (wc & ~0xFF) {
|
|
||||||
wc = '?';
|
|
||||||
}
|
|
||||||
converted[0] = wc;
|
|
||||||
result.append(converted, 1);
|
|
||||||
} else {
|
|
||||||
std::memset(converted, 0, sizeof converted);
|
|
||||||
size_t len = std::wcrtomb(converted, wc, &state);
|
|
||||||
if (len == static_cast<size_t>(-1)) {
|
|
||||||
FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc);
|
|
||||||
std::memset(&state, 0, sizeof(state));
|
|
||||||
} else {
|
|
||||||
result.append(converted, len);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <locale>
|
#include <locale>
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "flog.h"
|
||||||
|
|
||||||
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) {
|
wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) {
|
||||||
using size_type = wcstring::size_type;
|
using size_type = wcstring::size_type;
|
||||||
|
@ -196,3 +197,7 @@ wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void wcs2string_bad_char(wchar_t wc) {
|
||||||
|
FLOGF(char_encoding, L"Wide character U+%4X has no narrow representation", wc);
|
||||||
|
}
|
||||||
|
|
|
@ -3,10 +3,12 @@
|
||||||
#define FISH_WCSTRINGUTIL_H
|
#define FISH_WCSTRINGUTIL_H
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "expand.h"
|
||||||
|
|
||||||
/// Test if a string prefixes another. Returns true if a is a prefix of b.
|
/// Test if a string prefixes another. Returns true if a is a prefix of b.
|
||||||
bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value);
|
bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value);
|
||||||
|
@ -136,6 +138,47 @@ wcstring trim(wcstring input, const wchar_t *any_of);
|
||||||
/// Converts a string to lowercase.
|
/// Converts a string to lowercase.
|
||||||
wcstring wcstolower(wcstring input);
|
wcstring wcstolower(wcstring input);
|
||||||
|
|
||||||
|
// Out-of-line helper for wcs2string_callback.
|
||||||
|
void wcs2string_bad_char(wchar_t);
|
||||||
|
|
||||||
|
/// Implementation of wcs2string that accepts a callback.
|
||||||
|
/// This invokes \p func with (const char*, size_t) pairs.
|
||||||
|
/// If \p func returns false, it stops; otherwise it continues.
|
||||||
|
/// \return false if the callback returned false, otherwise true.
|
||||||
|
template <typename Func>
|
||||||
|
bool wcs2string_callback(const wchar_t *input, size_t len, const Func &func) {
|
||||||
|
mbstate_t state = {};
|
||||||
|
char converted[MB_LEN_MAX];
|
||||||
|
|
||||||
|
for (size_t i = 0; i < len; i++) {
|
||||||
|
wchar_t wc = input[i];
|
||||||
|
// TODO: this doesn't seem sound.
|
||||||
|
if (wc == INTERNAL_SEPARATOR) {
|
||||||
|
// do nothing
|
||||||
|
} else if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
|
||||||
|
converted[0] = wc - ENCODE_DIRECT_BASE;
|
||||||
|
if (!func(converted, 1)) return false;
|
||||||
|
} else if (MB_CUR_MAX == 1) { // single-byte locale (C/POSIX/ISO-8859)
|
||||||
|
// If `wc` contains a wide character we emit a question-mark.
|
||||||
|
if (wc & ~0xFF) {
|
||||||
|
wc = '?';
|
||||||
|
}
|
||||||
|
converted[0] = wc;
|
||||||
|
if (!func(converted, 1)) return false;
|
||||||
|
} else {
|
||||||
|
std::memset(converted, 0, sizeof converted);
|
||||||
|
size_t len = std::wcrtomb(converted, wc, &state);
|
||||||
|
if (len == static_cast<size_t>(-1)) {
|
||||||
|
wcs2string_bad_char(wc);
|
||||||
|
std::memset(&state, 0, sizeof(state));
|
||||||
|
} else {
|
||||||
|
if (!func(converted, len)) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Support for iterating over a newline-separated string.
|
/// Support for iterating over a newline-separated string.
|
||||||
template <typename Collection>
|
template <typename Collection>
|
||||||
class line_iterator_t {
|
class line_iterator_t {
|
||||||
|
|
Loading…
Reference in a new issue