From 6705a2efc6af31fb509559be2e253447c6a4e533 Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 15 Jan 2020 13:16:43 -0800 Subject: [PATCH] Migrate a bunch of code out of common.h Put it into wcstringutil, path, or a new file null_terminated_array. --- CMakeLists.txt | 2 +- src/builtin_argparse.cpp | 1 + src/builtin_complete.cpp | 1 + src/builtin_functions.cpp | 1 + src/builtin_printf.cpp | 1 + src/builtin_set.cpp | 1 + src/common.cpp | 218 +--------------------------------- src/common.h | 174 --------------------------- src/complete.cpp | 1 + src/env.h | 1 + src/exec.cpp | 1 + src/fish.cpp | 1 + src/flog.cpp | 1 + src/flog.h | 1 + src/future_feature_flags.cpp | 2 + src/highlight.cpp | 1 + src/null_terminated_array.cpp | 73 ++++++++++++ src/null_terminated_array.h | 87 ++++++++++++++ src/output.cpp | 1 + src/path.cpp | 18 +++ src/path.h | 3 + src/screen.h | 1 + src/wcstringutil.cpp | 129 +++++++++++++++++++- src/wcstringutil.h | 97 +++++++++++++++ src/wildcard.cpp | 2 + 25 files changed, 425 insertions(+), 394 deletions(-) create mode 100644 src/null_terminated_array.cpp create mode 100644 src/null_terminated_array.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 46cf4cad8..ee3834c70 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -117,7 +117,7 @@ SET(FISH_SRCS src/signal.cpp src/tinyexpr.cpp src/tnode.cpp src/tokenizer.cpp src/utf8.cpp src/util.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp src/wutil.cpp src/future_feature_flags.cpp src/redirection.cpp src/topic_monitor.cpp - src/flog.cpp src/trace.cpp src/timer.cpp + src/flog.cpp src/trace.cpp src/timer.cpp src/null_terminated_array.cpp ) # Header files are just globbed. diff --git a/src/builtin_argparse.cpp b/src/builtin_argparse.cpp index 3f3c1c43f..0483e79c3 100644 --- a/src/builtin_argparse.cpp +++ b/src/builtin_argparse.cpp @@ -23,6 +23,7 @@ #include "fallback.h" // IWYU pragma: keep #include "io.h" #include "parser.h" +#include "wcstringutil.h" #include "wgetopt.h" // IWYU pragma: keep #include "wutil.h" // IWYU pragma: keep diff --git a/src/builtin_complete.cpp b/src/builtin_complete.cpp index cbcd7f96a..3e2372826 100644 --- a/src/builtin_complete.cpp +++ b/src/builtin_complete.cpp @@ -19,6 +19,7 @@ #include "parse_util.h" #include "parser.h" #include "reader.h" +#include "wcstringutil.h" #include "wgetopt.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/builtin_functions.cpp b/src/builtin_functions.cpp index c3f5404c0..c1d936351 100644 --- a/src/builtin_functions.cpp +++ b/src/builtin_functions.cpp @@ -26,6 +26,7 @@ #include "parser_keywords.h" #include "proc.h" #include "signal.h" +#include "wcstringutil.h" #include "wgetopt.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/builtin_printf.cpp b/src/builtin_printf.cpp index e8a208b12..bda9d974b 100644 --- a/src/builtin_printf.cpp +++ b/src/builtin_printf.cpp @@ -66,6 +66,7 @@ #include "builtin.h" #include "common.h" #include "io.h" +#include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep class parser_t; diff --git a/src/builtin_set.cpp b/src/builtin_set.cpp index 06af7b60b..d6082ef2b 100644 --- a/src/builtin_set.cpp +++ b/src/builtin_set.cpp @@ -25,6 +25,7 @@ #include "io.h" #include "parser.h" #include "proc.h" +#include "wcstringutil.h" #include "wgetopt.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/common.cpp b/src/common.cpp index 6e43badc4..3fd2da590 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -48,7 +48,6 @@ #include #include -#include #include // IWYU pragma: keep #include @@ -63,6 +62,7 @@ #include "parser.h" #include "proc.h" #include "signal.h" +#include "wcstringutil.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep @@ -1870,62 +1870,6 @@ int common_get_width() { return get_current_winsize().ws_col; } int common_get_height() { return get_current_winsize().ws_row; } -bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value) { - return string_prefixes_string(proposed_prefix, value.c_str()); -} - -bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value) { - size_t prefix_size = proposed_prefix.size(); - return prefix_size <= value.size() && value.compare(0, prefix_size, proposed_prefix) == 0; -} - -bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value) { - for (size_t idx = 0; proposed_prefix[idx] != L'\0'; idx++) { - // Note if the prefix is longer than value, then we will compare a nonzero prefix character - // against a zero value character, and so we'll return false; - if (proposed_prefix[idx] != value[idx]) return false; - } - // We must have that proposed_prefix[idx] == L'\0', so we have a prefix match. - return true; -} - -bool string_prefixes_string(const char *proposed_prefix, const std::string &value) { - return string_prefixes_string(proposed_prefix, value.c_str()); -} - -bool string_prefixes_string(const char *proposed_prefix, const char *value) { - for (size_t idx = 0; proposed_prefix[idx] != L'\0'; idx++) { - if (proposed_prefix[idx] != value[idx]) return false; - } - return true; -} - -bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, - const wcstring &value) { - size_t prefix_size = proposed_prefix.size(); - return prefix_size <= value.size() && - wcsncasecmp(proposed_prefix.c_str(), value.c_str(), prefix_size) == 0; -} - -bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value) { - size_t suffix_size = proposed_suffix.size(); - return suffix_size <= value.size() && - value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0; -} - -bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value) { - size_t suffix_size = std::wcslen(proposed_suffix); - return suffix_size <= value.size() && - value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0; -} - -bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, - const wcstring &value) { - size_t suffix_size = proposed_suffix.size(); - return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size), - proposed_suffix.c_str(), suffix_size) == 0; -} - /// Returns true if seq, represented as a subsequence, is contained within string. static bool subsequence_in_string(const wcstring &seq, const wcstring &str) { // Impossible if seq is larger than string. @@ -2021,74 +1965,6 @@ int string_fuzzy_match_t::compare(const string_fuzzy_match_t &rhs) const { return 0; // equal } -template -size_t ifind_impl(const T &haystack, const T &needle) { - using char_t = typename T::value_type; - std::locale locale; - - auto ieq = [&locale](char_t c1, char_t c2) { - if (c1 == c2 || std::toupper(c1, locale) == std::toupper(c2, locale)) return true; - - // In fuzzy matching treat treat `-` and `_` as equal (#3584). - if (Fuzzy) { - if ((c1 == '-' || c1 == '_') && (c2 == '-' || c2 == '_')) return true; - } - return false; - }; - - auto result = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end(), ieq); - if (result != haystack.end()) { - return result - haystack.begin(); - } - return T::npos; -} - -size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy) { - return fuzzy ? ifind_impl(haystack, needle) : ifind_impl(haystack, needle); -} - -size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy) { - return fuzzy ? ifind_impl(haystack, needle) : ifind_impl(haystack, needle); -} - -wcstring_list_t split_string(const wcstring &val, wchar_t sep) { - wcstring_list_t out; - size_t pos = 0, end = val.size(); - while (pos <= end) { - size_t next_pos = val.find(sep, pos); - if (next_pos == wcstring::npos) { - next_pos = end; - } - out.emplace_back(val, pos, next_pos - pos); - pos = next_pos + 1; // skip the separator, or skip past the end - } - return out; -} - -wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) { - if (vals.empty()) return wcstring{}; - - // Reserve the size we will need. - // count-1 separators, plus the length of all strings. - size_t size = vals.size() - 1; - for (const wcstring &s : vals) { - size += s.size(); - } - - // Construct the string. - wcstring result; - result.reserve(size); - bool first = true; - for (const wcstring &s : vals) { - if (!first) { - result.push_back(sep); - } - result.append(s); - first = false; - } - return result; -} - int create_directory(const wcstring &d) { bool ok = false; struct stat buf; @@ -2212,23 +2088,6 @@ double timef() { void exit_without_destructors(int code) { _exit(code); } -/// Helper function to convert from a null_terminated_array_t to a -/// null_terminated_array_t. -void convert_wide_array_to_narrow(const null_terminated_array_t &wide_arr, - null_terminated_array_t *output) { - const wchar_t *const *arr = wide_arr.get(); - if (!arr) { - output->clear(); - return; - } - - std::vector list; - for (size_t i = 0; arr[i]; i++) { - list.push_back(wcs2string(arr[i])); - } - output->set(list); -} - void autoclose_fd_t::close() { if (fd_ < 0) return; if (::close(fd_) == -1) { @@ -2237,24 +2096,6 @@ void autoclose_fd_t::close() { fd_ = -1; } -void append_path_component(wcstring &path, const wcstring &component) { - if (path.empty() || component.empty()) { - path.append(component); - } else { - size_t path_len = path.size(); - bool path_slash = path.at(path_len - 1) == L'/'; - bool comp_slash = component.at(0) == L'/'; - if (!path_slash && !comp_slash) { - // Need a slash - path.push_back(L'/'); - } else if (path_slash && comp_slash) { - // Too many slashes. - path.erase(path_len - 1, 1); - } - path.append(component); - } -} - extern "C" { [[gnu::noinline]] void debug_thread_error(void) { // Wait for a SIGINT. We can't use sigsuspend() because the signal may be delivered on another @@ -2335,63 +2176,6 @@ void assert_is_locked(void *vmutex, const char *who, const char *caller) { } } -template -static CharType_t **make_null_terminated_array_helper( - const std::vector > &argv) { - size_t count = argv.size(); - - // We allocate everything in one giant block. First compute how much space we need. - // N + 1 pointers. - size_t pointers_allocation_len = (count + 1) * sizeof(CharType_t *); - - // In the very unlikely event that CharType_t has stricter alignment requirements than does a - // pointer, round us up to the size of a CharType_t. - pointers_allocation_len += sizeof(CharType_t) - 1; - pointers_allocation_len -= pointers_allocation_len % sizeof(CharType_t); - - // N null terminated strings. - size_t strings_allocation_len = 0; - for (size_t i = 0; i < count; i++) { - // The size of the string, plus a null terminator. - strings_allocation_len += (argv.at(i).size() + 1) * sizeof(CharType_t); - } - - // Now allocate their sum. - unsigned char *base = - static_cast(malloc(pointers_allocation_len + strings_allocation_len)); - if (!base) return nullptr; - - // Divvy it up into the pointers and strings. - CharType_t **pointers = reinterpret_cast(base); - CharType_t *strings = reinterpret_cast(base + pointers_allocation_len); - - // Start copying. - for (size_t i = 0; i < count; i++) { - const std::basic_string &str = argv.at(i); - *pointers++ = strings; // store the current string pointer into self - strings = std::copy(str.begin(), str.end(), strings); // copy the string into strings - *strings++ = (CharType_t)(0); // each string needs a null terminator - } - *pointers++ = nullptr; // array of pointers needs a null terminator - - // Make sure we know what we're doing. - assert((unsigned char *)pointers - base == (std::ptrdiff_t)pointers_allocation_len); - assert((unsigned char *)strings - (unsigned char *)pointers == - (std::ptrdiff_t)strings_allocation_len); - assert((unsigned char *)strings - base == - (std::ptrdiff_t)(pointers_allocation_len + strings_allocation_len)); - - return reinterpret_cast(base); -} - -wchar_t **make_null_terminated_array(const wcstring_list_t &lst) { - return make_null_terminated_array_helper(lst); -} - -char **make_null_terminated_array(const std::vector &lst) { - return make_null_terminated_array_helper(lst); -} - /// Test if the specified character is in a range that fish uses interally to store special tokens. /// /// NOTE: This is used when tokenizing the input. It is also used when reading input, before diff --git a/src/common.h b/src/common.h index d3d1939d3..988d5b55b 100644 --- a/src/common.h +++ b/src/common.h @@ -287,69 +287,6 @@ char *wcs2str(const wchar_t *in); char *wcs2str(const wcstring &in); std::string wcs2string(const wcstring &input); -/// Test if a string prefixes another. Returns true if a is a prefix of b. -bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value); -bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value); -bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value); -bool string_prefixes_string(const char *proposed_prefix, const std::string &value); -bool string_prefixes_string(const char *proposed_prefix, const char *value); - -/// Test if a string is a suffix of another. -bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value); -bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value); -bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, - const wcstring &value); - -/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b. -bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, - const wcstring &value); - -/// Case-insensitive string search, modeled after std::string::find(). -/// \param fuzzy indicates this is being used for fuzzy matching and case insensitivity is -/// expanded to include symbolic characters (#3584). -/// \return the offset of the first case-insensitive matching instance of `needle` within -/// `haystack`, or `string::npos()` if no results were found. -size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy = false); -size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy = false); - -/// Split a string by a separator character. -wcstring_list_t split_string(const wcstring &val, wchar_t sep); - -/// Join a list of strings by a separator character. -wcstring join_strings(const wcstring_list_t &vals, wchar_t sep); - -/// Support for iterating over a newline-separated string. -template -class line_iterator_t { - // Storage for each line. - Collection storage; - - // The collection we're iterating. Note we hold this by reference. - const Collection &coll; - - // The current location in the iteration. - typename Collection::const_iterator current; - - public: - /// Construct from a collection (presumably std::string or std::wcstring). - line_iterator_t(const Collection &coll) : coll(coll), current(coll.cbegin()) {} - - /// Access the storage in which the last line was stored. - const Collection &line() const { return storage; } - - /// Advances to the next line. \return true on success, false if we have exhausted the string. - bool next() { - if (current == coll.end()) return false; - auto newline_or_end = std::find(current, coll.cend(), '\n'); - storage.assign(current, newline_or_end); - current = newline_or_end; - - // Skip the newline. - if (current != coll.cend()) ++current; - return true; - } -}; - enum fuzzy_match_type_t { // We match the string exactly: FOOBAR matches FOOBAR. fuzzy_match_exact = 0, @@ -483,114 +420,6 @@ void format_ullong_safe(wchar_t buff[64], unsigned long long val); /// "Narrows" a wide character string. This just grabs any ASCII characters and trunactes. void narrow_string_safe(char buff[64], const wchar_t *s); -inline wcstring to_string(long x) { - wchar_t buff[64]; - format_long_safe(buff, x); - return wcstring(buff); -} - -inline wcstring to_string(unsigned long long x) { - wchar_t buff[64]; - format_ullong_safe(buff, x); - return wcstring(buff); -} - -inline wcstring to_string(int x) { return to_string(static_cast(x)); } - -inline wcstring to_string(size_t x) { return to_string(static_cast(x)); } - -inline bool bool_from_string(const std::string &x) { - if (x.empty()) return false; - switch (x.front()) { - case 'Y': - case 'T': - case 'y': - case 't': - case '1': - return true; - default: - return false; - } -} - -inline bool bool_from_string(const wcstring &x) { - return !x.empty() && std::wcschr(L"YTyt1", x.at(0)); -} - -wchar_t **make_null_terminated_array(const wcstring_list_t &lst); -char **make_null_terminated_array(const std::vector &lst); - -// Helper class for managing a null-terminated array of null-terminated strings (of some char type). -template -class null_terminated_array_t { - CharType_t **array{nullptr}; - - // No assignment or copying. - void operator=(null_terminated_array_t rhs) = delete; - null_terminated_array_t(const null_terminated_array_t &) = delete; - - typedef std::vector> string_list_t; - - size_t size() const { - size_t len = 0; - if (array != nullptr) { - while (array[len] != nullptr) { - len++; - } - } - return len; - } - - void free(void) { - ::free((void *)array); - array = nullptr; - } - - public: - null_terminated_array_t() = default; - - explicit null_terminated_array_t(const string_list_t &argv) - : array(make_null_terminated_array(argv)) {} - - ~null_terminated_array_t() { this->free(); } - - null_terminated_array_t(null_terminated_array_t &&rhs) : array(rhs.array) { - rhs.array = nullptr; - } - - null_terminated_array_t operator=(null_terminated_array_t &&rhs) { - free(); - array = rhs.array; - rhs.array = nullptr; - } - - void set(const string_list_t &argv) { - this->free(); - this->array = make_null_terminated_array(argv); - } - - /// Convert from a null terminated list to a vector of strings. - static string_list_t to_list(const CharType_t *const *arr) { - string_list_t result; - for (const auto *cursor = arr; cursor && *cursor; cursor++) { - result.push_back(*cursor); - } - return result; - } - - /// Instance method. - string_list_t to_list() const { return to_list(array); } - - const CharType_t *const *get() const { return array; } - CharType_t **get() { return array; } - - void clear() { this->free(); } -}; - -// Helper function to convert from a null_terminated_array_t to a -// null_terminated_array_t. -void convert_wide_array_to_narrow(const null_terminated_array_t &arr, - null_terminated_array_t *output); typedef std::lock_guard scoped_lock; typedef std::lock_guard scoped_rlock; @@ -736,9 +565,6 @@ class autoclose_fd_t { ~autoclose_fd_t() { close(); } }; -/// Appends a path component, with a / if necessary. -void append_path_component(wcstring &path, const wcstring &component); - wcstring format_string(const wchar_t *format, ...); wcstring vformat_string(const wchar_t *format, va_list va_orig); void append_format(wcstring &str, const wchar_t *format, ...); diff --git a/src/complete.cpp b/src/complete.cpp index d496dc689..038becaba 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -47,6 +47,7 @@ #include "reader.h" #include "tnode.h" #include "util.h" +#include "wcstringutil.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/env.h b/src/env.h index b33a5e143..4b0b6421f 100644 --- a/src/env.h +++ b/src/env.h @@ -12,6 +12,7 @@ #include "common.h" #include "maybe.h" +#include "null_terminated_array.h" extern size_t read_byte_limit; extern bool curses_initialized; diff --git a/src/exec.cpp b/src/exec.cpp index 953b8bc2d..0841c7fdd 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -36,6 +36,7 @@ #include "function.h" #include "io.h" #include "iothread.h" +#include "null_terminated_array.h" #include "parse_tree.h" #include "parser.h" #include "path.h" diff --git a/src/fish.cpp b/src/fish.cpp index f9d7e2f38..57573a25d 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -55,6 +55,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "proc.h" #include "reader.h" #include "signal.h" +#include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep // container to hold the options specified within the command line diff --git a/src/flog.cpp b/src/flog.cpp index 477a295dd..5686fb487 100644 --- a/src/flog.cpp +++ b/src/flog.cpp @@ -9,6 +9,7 @@ #include "enum_set.h" #include "global_safety.h" #include "parse_util.h" +#include "wcstringutil.h" #include "wildcard.h" namespace flog_details { diff --git a/src/flog.h b/src/flog.h index 29677935f..9c478fe28 100644 --- a/src/flog.h +++ b/src/flog.h @@ -11,6 +11,7 @@ #include #include "global_safety.h" +#include "wcstringutil.h" using wcstring = std::wstring; using wcstring_list_t = std::vector; diff --git a/src/future_feature_flags.cpp b/src/future_feature_flags.cpp index fd90cd5eb..b50ea03e3 100644 --- a/src/future_feature_flags.cpp +++ b/src/future_feature_flags.cpp @@ -4,6 +4,8 @@ #include +#include "wcstringutil.h" + /// The set of features applying to this instance. static features_t global_features; diff --git a/src/highlight.cpp b/src/highlight.cpp index a4a230797..102648ecf 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -32,6 +32,7 @@ #include "path.h" #include "tnode.h" #include "tokenizer.h" +#include "wcstringutil.h" #include "wildcard.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/null_terminated_array.cpp b/src/null_terminated_array.cpp new file mode 100644 index 000000000..368584d7f --- /dev/null +++ b/src/null_terminated_array.cpp @@ -0,0 +1,73 @@ +#include "null_terminated_array.h" + +template +static CharT **make_null_terminated_array_helper( + const std::vector > &argv) { + size_t count = argv.size(); + + // We allocate everything in one giant block. First compute how much space we need. + // N + 1 pointers. + size_t pointers_allocation_len = (count + 1) * sizeof(CharT *); + + // In the very unlikely event that CharT has stricter alignment requirements than does a + // pointer, round us up to the size of a CharT. + pointers_allocation_len += sizeof(CharT) - 1; + pointers_allocation_len -= pointers_allocation_len % sizeof(CharT); + + // N null terminated strings. + size_t strings_allocation_len = 0; + for (size_t i = 0; i < count; i++) { + // The size of the string, plus a null terminator. + strings_allocation_len += (argv.at(i).size() + 1) * sizeof(CharT); + } + + // Now allocate their sum. + unsigned char *base = + static_cast(malloc(pointers_allocation_len + strings_allocation_len)); + if (!base) return nullptr; + + // Divvy it up into the pointers and strings. + CharT **pointers = reinterpret_cast(base); + CharT *strings = reinterpret_cast(base + pointers_allocation_len); + + // Start copying. + for (size_t i = 0; i < count; i++) { + const std::basic_string &str = argv.at(i); + *pointers++ = strings; // store the current string pointer into self + strings = std::copy(str.begin(), str.end(), strings); // copy the string into strings + *strings++ = (CharT)(0); // each string needs a null terminator + } + *pointers++ = nullptr; // array of pointers needs a null terminator + + // Make sure we know what we're doing. + assert((unsigned char *)pointers - base == (std::ptrdiff_t)pointers_allocation_len); + assert((unsigned char *)strings - (unsigned char *)pointers == + (std::ptrdiff_t)strings_allocation_len); + assert((unsigned char *)strings - base == + (std::ptrdiff_t)(pointers_allocation_len + strings_allocation_len)); + + return reinterpret_cast(base); +} + +wchar_t **make_null_terminated_array(const wcstring_list_t &lst) { + return make_null_terminated_array_helper(lst); +} + +char **make_null_terminated_array(const std::vector &lst) { + return make_null_terminated_array_helper(lst); +} + +void convert_wide_array_to_narrow(const null_terminated_array_t &wide_arr, + null_terminated_array_t *output) { + const wchar_t *const *arr = wide_arr.get(); + if (!arr) { + output->clear(); + return; + } + + std::vector list; + for (size_t i = 0; arr[i]; i++) { + list.push_back(wcs2string(arr[i])); + } + output->set(list); +} diff --git a/src/null_terminated_array.h b/src/null_terminated_array.h new file mode 100644 index 000000000..59950be75 --- /dev/null +++ b/src/null_terminated_array.h @@ -0,0 +1,87 @@ +// Support for null-terminated arrays like char**. +#ifndef FISH_NULL_TERMINATED_ARRAY_H +#define FISH_NULL_TERMINATED_ARRAY_H + +#include "config.h" // IWYU pragma: keep + +#include +#include + +#include "common.h" + +wchar_t **make_null_terminated_array(const wcstring_list_t &lst); +char **make_null_terminated_array(const std::vector &lst); + +// Helper class for managing a null-terminated array of null-terminated strings (of some char type). +template +class null_terminated_array_t { + using string_list_t = std::vector>; + + CharT **array{nullptr}; + + // No assignment or copying. + void operator=(null_terminated_array_t rhs) = delete; + null_terminated_array_t(const null_terminated_array_t &) = delete; + + size_t size() const { + size_t len = 0; + if (array != nullptr) { + while (array[len] != nullptr) { + len++; + } + } + return len; + } + + void free(void) { + ::free((void *)array); + array = nullptr; + } + + public: + null_terminated_array_t() = default; + + explicit null_terminated_array_t(const string_list_t &argv) + : array(make_null_terminated_array(argv)) {} + + ~null_terminated_array_t() { this->free(); } + + null_terminated_array_t(null_terminated_array_t &&rhs) : array(rhs.array) { + rhs.array = nullptr; + } + + null_terminated_array_t operator=(null_terminated_array_t &&rhs) { + free(); + array = rhs.array; + rhs.array = nullptr; + } + + void set(const string_list_t &argv) { + this->free(); + this->array = make_null_terminated_array(argv); + } + + /// Convert from a null terminated list to a vector of strings. + static string_list_t to_list(const CharT *const *arr) { + string_list_t result; + for (const auto *cursor = arr; cursor && *cursor; cursor++) { + result.push_back(*cursor); + } + return result; + } + + /// Instance method. + string_list_t to_list() const { return to_list(array); } + + const CharT *const *get() const { return array; } + CharT **get() { return array; } + + void clear() { this->free(); } +}; + +// Helper function to convert from a null_terminated_array_t to a +// null_terminated_array_t. +void convert_wide_array_to_narrow(const null_terminated_array_t &arr, + null_terminated_array_t *output); + +#endif // FISH_NULL_TERMINATED_ARRAY_H diff --git a/src/output.cpp b/src/output.cpp index 69ce26297..501d3ecd3 100644 --- a/src/output.cpp +++ b/src/output.cpp @@ -30,6 +30,7 @@ #include "fallback.h" // IWYU pragma: keep #include "flog.h" #include "output.h" +#include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep /// Whether term256 and term24bit are supported. diff --git a/src/path.cpp b/src/path.cpp index 976103df2..3e2995103 100644 --- a/src/path.cpp +++ b/src/path.cpp @@ -442,3 +442,21 @@ bool paths_are_same_file(const wcstring &path1, const wcstring &path2) { return false; } + +void append_path_component(wcstring &path, const wcstring &component) { + if (path.empty() || component.empty()) { + path.append(component); + } else { + size_t path_len = path.size(); + bool path_slash = path.at(path_len - 1) == L'/'; + bool comp_slash = component.at(0) == L'/'; + if (!path_slash && !comp_slash) { + // Need a slash + path.push_back(L'/'); + } else if (path_slash && comp_slash) { + // Too many slashes. + path.erase(path_len - 1, 1); + } + path.append(component); + } +} diff --git a/src/path.h b/src/path.h index 1bf64cb4b..475be6100 100644 --- a/src/path.h +++ b/src/path.h @@ -86,4 +86,7 @@ bool paths_are_same_file(const wcstring &path1, const wcstring &path2); /// directory. This operates on unescaped paths only (so a ~ means a literal ~). wcstring path_apply_working_directory(const wcstring &path, const wcstring &working_directory); +/// Appends a path component, with a / if necessary. +void append_path_component(wcstring &path, const wcstring &component); + #endif diff --git a/src/screen.h b/src/screen.h index d4f3e277c..0394845ef 100644 --- a/src/screen.h +++ b/src/screen.h @@ -24,6 +24,7 @@ #include "common.h" #include "highlight.h" +#include "wcstringutil.h" class page_rendering_t; diff --git a/src/wcstringutil.cpp b/src/wcstringutil.cpp index f5a5658ae..9a75132c2 100644 --- a/src/wcstringutil.cpp +++ b/src/wcstringutil.cpp @@ -5,11 +5,12 @@ #include +#include + #include "common.h" -using size_type = wcstring::size_type; - wcstring_range wcstring_tok(wcstring &str, const wcstring &needle, wcstring_range last) { + using size_type = wcstring::size_type; size_type pos = last.second == wcstring::npos ? wcstring::npos : last.first; if (pos != wcstring::npos && last.second != wcstring::npos) pos += last.second; if (pos != wcstring::npos && pos != 0) ++pos; @@ -71,3 +72,127 @@ wcstring wcstolower(wcstring input) { std::transform(result.begin(), result.end(), result.begin(), towlower); return result; } + +bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value) { + return string_prefixes_string(proposed_prefix, value.c_str()); +} + +bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value) { + size_t prefix_size = proposed_prefix.size(); + return prefix_size <= value.size() && value.compare(0, prefix_size, proposed_prefix) == 0; +} + +bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value) { + for (size_t idx = 0; proposed_prefix[idx] != L'\0'; idx++) { + // Note if the prefix is longer than value, then we will compare a nonzero prefix character + // against a zero value character, and so we'll return false; + if (proposed_prefix[idx] != value[idx]) return false; + } + // We must have that proposed_prefix[idx] == L'\0', so we have a prefix match. + return true; +} + +bool string_prefixes_string(const char *proposed_prefix, const std::string &value) { + return string_prefixes_string(proposed_prefix, value.c_str()); +} + +bool string_prefixes_string(const char *proposed_prefix, const char *value) { + for (size_t idx = 0; proposed_prefix[idx] != L'\0'; idx++) { + if (proposed_prefix[idx] != value[idx]) return false; + } + return true; +} + +bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, + const wcstring &value) { + size_t prefix_size = proposed_prefix.size(); + return prefix_size <= value.size() && + wcsncasecmp(proposed_prefix.c_str(), value.c_str(), prefix_size) == 0; +} + +bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value) { + size_t suffix_size = proposed_suffix.size(); + return suffix_size <= value.size() && + value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0; +} + +bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value) { + size_t suffix_size = std::wcslen(proposed_suffix); + return suffix_size <= value.size() && + value.compare(value.size() - suffix_size, suffix_size, proposed_suffix) == 0; +} + +bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, + const wcstring &value) { + size_t suffix_size = proposed_suffix.size(); + return suffix_size <= value.size() && wcsncasecmp(value.c_str() + (value.size() - suffix_size), + proposed_suffix.c_str(), suffix_size) == 0; +} + +template +size_t ifind_impl(const T &haystack, const T &needle) { + using char_t = typename T::value_type; + std::locale locale; + + auto ieq = [&locale](char_t c1, char_t c2) { + if (c1 == c2 || std::toupper(c1, locale) == std::toupper(c2, locale)) return true; + + // In fuzzy matching treat treat `-` and `_` as equal (#3584). + if (Fuzzy) { + if ((c1 == '-' || c1 == '_') && (c2 == '-' || c2 == '_')) return true; + } + return false; + }; + + auto result = std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end(), ieq); + if (result != haystack.end()) { + return result - haystack.begin(); + } + return T::npos; +} + +size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy) { + return fuzzy ? ifind_impl(haystack, needle) : ifind_impl(haystack, needle); +} + +size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy) { + return fuzzy ? ifind_impl(haystack, needle) : ifind_impl(haystack, needle); +} + +wcstring_list_t split_string(const wcstring &val, wchar_t sep) { + wcstring_list_t out; + size_t pos = 0, end = val.size(); + while (pos <= end) { + size_t next_pos = val.find(sep, pos); + if (next_pos == wcstring::npos) { + next_pos = end; + } + out.emplace_back(val, pos, next_pos - pos); + pos = next_pos + 1; // skip the separator, or skip past the end + } + return out; +} + +wcstring join_strings(const wcstring_list_t &vals, wchar_t sep) { + if (vals.empty()) return wcstring{}; + + // Reserve the size we will need. + // count-1 separators, plus the length of all strings. + size_t size = vals.size() - 1; + for (const wcstring &s : vals) { + size += s.size(); + } + + // Construct the string. + wcstring result; + result.reserve(size); + bool first = true; + for (const wcstring &s : vals) { + if (!first) { + result.push_back(sep); + } + result.append(s); + first = false; + } + return result; +} diff --git a/src/wcstringutil.h b/src/wcstringutil.h index 54ce9ba2a..e18e6cee5 100644 --- a/src/wcstringutil.h +++ b/src/wcstringutil.h @@ -8,6 +8,71 @@ #include "common.h" +/// Test if a string prefixes another. Returns true if a is a prefix of b. +bool string_prefixes_string(const wcstring &proposed_prefix, const wcstring &value); +bool string_prefixes_string(const wchar_t *proposed_prefix, const wcstring &value); +bool string_prefixes_string(const wchar_t *proposed_prefix, const wchar_t *value); +bool string_prefixes_string(const char *proposed_prefix, const std::string &value); +bool string_prefixes_string(const char *proposed_prefix, const char *value); + +/// Test if a string is a suffix of another. +bool string_suffixes_string(const wcstring &proposed_suffix, const wcstring &value); +bool string_suffixes_string(const wchar_t *proposed_suffix, const wcstring &value); +bool string_suffixes_string_case_insensitive(const wcstring &proposed_suffix, + const wcstring &value); + +/// Test if a string prefixes another without regard to case. Returns true if a is a prefix of b. +bool string_prefixes_string_case_insensitive(const wcstring &proposed_prefix, + const wcstring &value); + +/// Case-insensitive string search, modeled after std::string::find(). +/// \param fuzzy indicates this is being used for fuzzy matching and case insensitivity is +/// expanded to include symbolic characters (#3584). +/// \return the offset of the first case-insensitive matching instance of `needle` within +/// `haystack`, or `string::npos()` if no results were found. +size_t ifind(const wcstring &haystack, const wcstring &needle, bool fuzzy = false); +size_t ifind(const std::string &haystack, const std::string &needle, bool fuzzy = false); + +/// Split a string by a separator character. +wcstring_list_t split_string(const wcstring &val, wchar_t sep); + +/// Join a list of strings by a separator character. +wcstring join_strings(const wcstring_list_t &vals, wchar_t sep); + +inline wcstring to_string(long x) { + wchar_t buff[64]; + format_long_safe(buff, x); + return wcstring(buff); +} + +inline wcstring to_string(unsigned long long x) { + wchar_t buff[64]; + format_ullong_safe(buff, x); + return wcstring(buff); +} + +inline wcstring to_string(int x) { return to_string(static_cast(x)); } + +inline wcstring to_string(size_t x) { return to_string(static_cast(x)); } + +inline bool bool_from_string(const std::string &x) { + if (x.empty()) return false; + switch (x.front()) { + case 'Y': + case 'T': + case 'y': + case 't': + case '1': + return true; + default: + return false; + } +} + +inline bool bool_from_string(const wcstring &x) { + return !x.empty() && std::wcschr(L"YTyt1", x.at(0)); +} + /// @typedef wcstring_range represents a range in a wcstring. /// The first element is the location, the second is the count. typedef std::pair wcstring_range; @@ -71,4 +136,36 @@ wcstring trim(wcstring input, const wchar_t *any_of); /// Converts a string to lowercase. wcstring wcstolower(wcstring input); +/// Support for iterating over a newline-separated string. +template +class line_iterator_t { + // Storage for each line. + Collection storage; + + // The collection we're iterating. Note we hold this by reference. + const Collection &coll; + + // The current location in the iteration. + typename Collection::const_iterator current; + + public: + /// Construct from a collection (presumably std::string or std::wcstring). + line_iterator_t(const Collection &coll) : coll(coll), current(coll.cbegin()) {} + + /// Access the storage in which the last line was stored. + const Collection &line() const { return storage; } + + /// Advances to the next line. \return true on success, false if we have exhausted the string. + bool next() { + if (current == coll.end()) return false; + auto newline_or_end = std::find(current, coll.cend(), '\n'); + storage.assign(current, newline_or_end); + current = newline_or_end; + + // Skip the newline. + if (current != coll.cend()) ++current; + return true; + } +}; + #endif diff --git a/src/wildcard.cpp b/src/wildcard.cpp index 692342bc6..d3c7ad12b 100644 --- a/src/wildcard.cpp +++ b/src/wildcard.cpp @@ -22,7 +22,9 @@ #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "future_feature_flags.h" +#include "path.h" #include "reader.h" +#include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep /// Description for generic executable.