2016-04-27 23:10:14 +00:00
|
|
|
// Various functions, mostly string utilities, that are used by most parts of fish.
|
2005-10-05 09:58:00 +00:00
|
|
|
#include "config.h"
|
|
|
|
|
2019-12-14 05:50:06 +00:00
|
|
|
#ifdef HAVE_BACKTRACE_SYMBOLS
|
2016-05-16 02:45:02 +00:00
|
|
|
#include <cxxabi.h>
|
2019-12-14 05:50:06 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#include <ctype.h>
|
2016-05-16 02:45:02 +00:00
|
|
|
#include <dlfcn.h>
|
2005-09-20 13:26:39 +00:00
|
|
|
#include <errno.h>
|
2016-12-15 03:21:36 +00:00
|
|
|
#include <fcntl.h>
|
2005-09-20 13:26:39 +00:00
|
|
|
#include <limits.h>
|
2017-09-21 05:00:14 +00:00
|
|
|
#include <pthread.h>
|
2012-11-19 00:30:30 +00:00
|
|
|
#include <stdarg.h>
|
2005-09-28 01:43:09 +00:00
|
|
|
#include <sys/time.h>
|
2016-04-27 23:10:14 +00:00
|
|
|
#include <termios.h>
|
|
|
|
#include <unistd.h>
|
2019-10-13 22:50:48 +00:00
|
|
|
|
2007-01-20 02:36:49 +00:00
|
|
|
#ifdef HAVE_EXECINFO_H
|
|
|
|
#include <execinfo.h>
|
|
|
|
#endif
|
2017-02-14 04:37:27 +00:00
|
|
|
|
2019-08-25 23:29:46 +00:00
|
|
|
#ifdef __linux__
|
|
|
|
// Includes for WSL detection
|
|
|
|
#include <sys/utsname.h>
|
|
|
|
#endif
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
#include <algorithm>
|
2022-08-21 06:14:48 +00:00
|
|
|
#include <csignal>
|
|
|
|
#include <cstdlib>
|
2022-08-21 21:51:33 +00:00
|
|
|
#include <cstring>
|
2022-08-21 06:14:48 +00:00
|
|
|
#include <cwchar>
|
|
|
|
#include <memory>
|
2007-01-20 02:36:49 +00:00
|
|
|
|
2005-09-20 13:26:39 +00:00
|
|
|
#include "common.h"
|
|
|
|
#include "expand.h"
|
2016-04-27 23:10:14 +00:00
|
|
|
#include "fallback.h" // IWYU pragma: keep
|
2019-05-27 22:56:53 +00:00
|
|
|
#include "flog.h"
|
2018-04-24 22:53:30 +00:00
|
|
|
#include "future_feature_flags.h"
|
2019-04-28 22:00:36 +00:00
|
|
|
#include "global_safety.h"
|
2019-05-29 19:33:44 +00:00
|
|
|
#include "iothread.h"
|
2019-05-26 02:19:03 +00:00
|
|
|
#include "signal.h"
|
2020-06-08 01:47:27 +00:00
|
|
|
#include "termsize.h"
|
2022-08-21 21:51:33 +00:00
|
|
|
#include "topic_monitor.h"
|
2020-01-15 21:16:43 +00:00
|
|
|
#include "wcstringutil.h"
|
2005-09-20 13:26:39 +00:00
|
|
|
#include "wildcard.h"
|
2016-04-27 23:10:14 +00:00
|
|
|
#include "wutil.h" // IWYU pragma: keep
|
2005-09-20 13:26:39 +00:00
|
|
|
|
2020-11-20 21:06:19 +00:00
|
|
|
// Keep after "common.h"
|
2021-10-02 23:47:17 +00:00
|
|
|
#ifdef HAVE_SYS_SYSCTL_H
|
2022-08-21 21:51:33 +00:00
|
|
|
#include <sys/sysctl.h> // IWYU pragma: keep
|
2021-10-02 23:47:17 +00:00
|
|
|
#endif
|
|
|
|
#if defined(__APPLE__)
|
2022-08-21 21:51:33 +00:00
|
|
|
#include <mach-o/dyld.h> // IWYU pragma: keep
|
2020-11-20 21:06:19 +00:00
|
|
|
#endif
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
struct termios shell_modes;
|
2005-09-20 13:26:39 +00:00
|
|
|
|
2021-02-04 23:18:34 +00:00
|
|
|
const wcstring g_empty_string{};
|
|
|
|
|
2018-12-31 01:39:59 +00:00
|
|
|
/// This allows us to notice when we've forked.
|
2019-04-28 22:00:36 +00:00
|
|
|
static relaxed_atomic_bool_t is_forked_proc{false};
|
2018-12-31 01:39:59 +00:00
|
|
|
/// This allows us to bypass the main thread checks
|
2019-04-28 22:00:36 +00:00
|
|
|
static relaxed_atomic_bool_t thread_asserts_cfg_for_testing{false};
|
|
|
|
|
|
|
|
static relaxed_atomic_t<wchar_t> ellipsis_char;
|
|
|
|
wchar_t get_ellipsis_char() { return ellipsis_char; }
|
|
|
|
|
|
|
|
static relaxed_atomic_t<const wchar_t *> ellipsis_str;
|
|
|
|
const wchar_t *get_ellipsis_str() { return ellipsis_str; }
|
|
|
|
|
|
|
|
static relaxed_atomic_t<const wchar_t *> omitted_newline_str;
|
|
|
|
const wchar_t *get_omitted_newline_str() { return omitted_newline_str; }
|
|
|
|
|
|
|
|
static relaxed_atomic_t<int> omitted_newline_width;
|
|
|
|
int get_omitted_newline_width() { return omitted_newline_width; }
|
|
|
|
|
|
|
|
static relaxed_atomic_t<wchar_t> obfuscation_read_char;
|
|
|
|
wchar_t get_obfuscation_read_char() { return obfuscation_read_char; }
|
2012-01-05 21:58:48 +00:00
|
|
|
|
2014-02-09 22:04:43 +00:00
|
|
|
bool g_profiling_active = false;
|
2021-07-05 21:29:24 +00:00
|
|
|
|
2018-11-28 14:08:24 +00:00
|
|
|
const wchar_t *program_name;
|
2019-04-28 22:00:36 +00:00
|
|
|
|
2016-05-16 02:45:02 +00:00
|
|
|
/// Be able to restore the term's foreground process group.
|
2018-09-29 04:20:50 +00:00
|
|
|
/// This is set during startup and not modified after.
|
2019-04-28 22:56:49 +00:00
|
|
|
static relaxed_atomic_t<pid_t> initial_fg_process_group{-1};
|
2005-09-24 19:31:17 +00:00
|
|
|
|
2018-09-29 04:20:50 +00:00
|
|
|
#if defined(OS_IS_CYGWIN) || defined(WSL)
|
|
|
|
// MS Windows tty devices do not currently have either a read or write timestamp. Those
|
|
|
|
// respective fields of `struct stat` are always the current time. Which means we can't
|
|
|
|
// use them. So we assume no external program has written to the terminal behind our
|
|
|
|
// back. This makes multiline promptusable. See issue #2859 and
|
|
|
|
// https://github.com/Microsoft/BashOnWindows/issues/545
|
|
|
|
const bool has_working_tty_timestamps = false;
|
|
|
|
#else
|
|
|
|
const bool has_working_tty_timestamps = true;
|
|
|
|
#endif
|
2016-06-17 20:08:25 +00:00
|
|
|
|
2017-06-23 03:47:54 +00:00
|
|
|
/// Convert a character to its integer equivalent if it is a valid character for the requested base.
|
|
|
|
/// Return the integer value if it is valid else -1.
|
|
|
|
long convert_digit(wchar_t d, int base) {
|
|
|
|
long res = -1;
|
|
|
|
if ((d <= L'9') && (d >= L'0')) {
|
|
|
|
res = d - L'0';
|
|
|
|
} else if ((d <= L'z') && (d >= L'a')) {
|
|
|
|
res = d + 10 - L'a';
|
|
|
|
} else if ((d <= L'Z') && (d >= L'A')) {
|
|
|
|
res = d + 10 - L'A';
|
|
|
|
}
|
|
|
|
if (res >= base) {
|
|
|
|
res = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Test whether the char is a valid hex digit as used by the `escape_string_*()` functions.
|
2019-11-19 02:34:50 +00:00
|
|
|
static bool is_hex_digit(int c) { return std::strchr("0123456789ABCDEF", c) != nullptr; }
|
2017-06-23 03:47:54 +00:00
|
|
|
|
|
|
|
/// This is a specialization of `convert_digit()` that only handles base 16 and only uppercase.
|
2020-09-08 20:04:44 +00:00
|
|
|
static long convert_hex_digit(wchar_t d) {
|
2017-06-23 03:47:54 +00:00
|
|
|
if ((d <= L'9') && (d >= L'0')) {
|
|
|
|
return d - L'0';
|
|
|
|
} else if ((d <= L'Z') && (d >= L'A')) {
|
|
|
|
return 10 + d - L'A';
|
|
|
|
}
|
|
|
|
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2019-02-14 23:53:07 +00:00
|
|
|
bool is_windows_subsystem_for_linux() {
|
|
|
|
#if defined(WSL)
|
|
|
|
return true;
|
|
|
|
#elif not defined(__linux__)
|
|
|
|
return false;
|
2019-02-15 00:30:10 +00:00
|
|
|
#else
|
2019-08-25 23:29:46 +00:00
|
|
|
// We are purposely not using std::call_once as it may invoke locking, which is an unnecessary
|
|
|
|
// overhead since there's no actual race condition here - even if multiple threads call this
|
|
|
|
// routine simultaneously the first time around, we just end up needlessly querying uname(2) one
|
|
|
|
// more time.
|
|
|
|
|
2020-10-10 10:50:07 +00:00
|
|
|
static bool wsl_state = [] {
|
2019-08-25 23:29:46 +00:00
|
|
|
utsname info;
|
2019-02-14 23:53:07 +00:00
|
|
|
uname(&info);
|
2019-08-25 23:29:46 +00:00
|
|
|
|
2019-08-25 23:50:17 +00:00
|
|
|
// Sample utsname.release under WSL, testing for something like `4.4.0-17763-Microsoft`
|
2019-08-25 23:29:46 +00:00
|
|
|
if (std::strstr(info.release, "Microsoft") != nullptr) {
|
|
|
|
const char *dash = std::strchr(info.release, '-');
|
|
|
|
if (dash == nullptr || strtod(dash + 1, nullptr) < 17763) {
|
2019-08-25 23:50:17 +00:00
|
|
|
// #5298, #5661: There are acknowledged, published, and (later) fixed issues with
|
|
|
|
// job control under early WSL releases that prevent fish from running correctly,
|
|
|
|
// with unexpected failures when piping. Fish 3.0 nightly builds worked around this
|
|
|
|
// issue with some needlessly complicated code that was later stripped from the
|
|
|
|
// fish 3.0 release, so we just bail. Note that fish 2.0 was also broken, but we
|
|
|
|
// just didn't warn about it.
|
|
|
|
|
|
|
|
// #6038 & 5101bde: It's been requested that there be some sort of way to disable
|
|
|
|
// this check: if the environment variable FISH_NO_WSL_CHECK is present, this test
|
|
|
|
// is bypassed. We intentionally do not include this in the error message because
|
|
|
|
// it'll only allow fish to run but not to actually work. Here be dragons!
|
|
|
|
if (getenv("FISH_NO_WSL_CHECK") == nullptr) {
|
2020-01-19 12:31:25 +00:00
|
|
|
FLOGF(error,
|
2019-08-25 23:50:17 +00:00
|
|
|
"This version of WSL has known bugs that prevent fish from working."
|
|
|
|
"Please upgrade to Windows 10 1809 (17763) or higher to use fish!");
|
|
|
|
}
|
2019-08-25 23:29:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
2019-02-14 23:53:07 +00:00
|
|
|
}();
|
|
|
|
|
|
|
|
// Subsequent calls to this function may take place after fork() and before exec() in
|
|
|
|
// postfork.cpp. Make sure we never dynamically allocate any memory in the fast path!
|
|
|
|
return wsl_state;
|
2019-02-15 00:30:10 +00:00
|
|
|
#endif
|
2019-02-14 23:53:07 +00:00
|
|
|
}
|
|
|
|
|
2016-05-20 02:27:22 +00:00
|
|
|
#ifdef HAVE_BACKTRACE_SYMBOLS
|
2016-05-16 02:45:02 +00:00
|
|
|
// This function produces a stack backtrace with demangled function & method names. It is based on
|
|
|
|
// https://gist.github.com/fmela/591333 but adapted to the style of the fish project.
|
2019-12-21 20:45:44 +00:00
|
|
|
[[gnu::noinline]] static wcstring_list_t demangled_backtrace(int max_frames, int skip_levels) {
|
2016-05-16 02:45:02 +00:00
|
|
|
void *callstack[128];
|
|
|
|
const int n_max_frames = sizeof(callstack) / sizeof(callstack[0]);
|
|
|
|
int n_frames = backtrace(callstack, n_max_frames);
|
|
|
|
char **symbols = backtrace_symbols(callstack, n_frames);
|
|
|
|
wchar_t text[1024];
|
2019-03-14 18:15:50 +00:00
|
|
|
wcstring_list_t backtrace_text;
|
2016-05-16 02:45:02 +00:00
|
|
|
|
|
|
|
if (skip_levels + max_frames < n_frames) n_frames = skip_levels + max_frames;
|
|
|
|
|
|
|
|
for (int i = skip_levels; i < n_frames; i++) {
|
|
|
|
Dl_info info;
|
|
|
|
if (dladdr(callstack[i], &info) && info.dli_sname) {
|
2019-11-19 02:34:50 +00:00
|
|
|
char *demangled = nullptr;
|
2016-05-16 02:45:02 +00:00
|
|
|
int status = -1;
|
|
|
|
if (info.dli_sname[0] == '_')
|
2019-11-19 02:34:50 +00:00
|
|
|
demangled = abi::__cxa_demangle(info.dli_sname, nullptr, nullptr, &status);
|
2020-11-22 13:39:48 +00:00
|
|
|
swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s + %td", i - skip_levels,
|
|
|
|
status == 0 ? demangled
|
|
|
|
: info.dli_sname == nullptr ? symbols[i]
|
|
|
|
: info.dli_sname,
|
2021-03-05 17:37:49 +00:00
|
|
|
static_cast<char *>(callstack[i]) - static_cast<const char *>(info.dli_saddr));
|
2016-05-16 02:45:02 +00:00
|
|
|
free(demangled);
|
|
|
|
} else {
|
|
|
|
swprintf(text, sizeof(text) / sizeof(wchar_t), L"%-3d %s", i - skip_levels, symbols[i]);
|
|
|
|
}
|
|
|
|
backtrace_text.push_back(text);
|
|
|
|
}
|
|
|
|
free(symbols);
|
|
|
|
return backtrace_text;
|
|
|
|
}
|
|
|
|
|
2022-06-16 08:26:43 +00:00
|
|
|
[[gnu::noinline]] void show_stackframe(int frame_count, int skip_levels) {
|
2016-10-21 01:53:31 +00:00
|
|
|
if (frame_count < 1) return;
|
2013-01-12 20:55:23 +00:00
|
|
|
|
2019-03-14 17:52:26 +00:00
|
|
|
wcstring_list_t bt = demangled_backtrace(frame_count, skip_levels + 2);
|
2022-06-16 08:26:43 +00:00
|
|
|
FLOG(error, L"Backtrace:\n" + join_strings(bt, L'\n') + L'\n');
|
2007-01-20 02:36:49 +00:00
|
|
|
}
|
|
|
|
|
2021-03-21 15:05:45 +00:00
|
|
|
#else // HAVE_BACKTRACE_SYMBOLS
|
2016-05-20 02:27:22 +00:00
|
|
|
|
2022-06-16 08:26:43 +00:00
|
|
|
[[gnu::noinline]] void show_stackframe(int, int) {
|
|
|
|
FLOGF(error, L"Sorry, but your system does not support backtraces");
|
2016-05-20 02:27:22 +00:00
|
|
|
}
|
2016-05-23 02:00:13 +00:00
|
|
|
#endif // HAVE_BACKTRACE_SYMBOLS
|
2016-05-20 02:27:22 +00:00
|
|
|
|
2020-09-19 23:53:12 +00:00
|
|
|
/// \return the smallest pointer in the range [start, start + len] which is aligned to Align.
|
|
|
|
/// If there is no such pointer, return \p start + len.
|
|
|
|
/// alignment must be a power of 2 and in range [1, 64].
|
|
|
|
/// This is intended to return the end point of the "unaligned prefix" of a vectorized loop.
|
|
|
|
template <size_t Align>
|
2021-09-28 00:34:49 +00:00
|
|
|
static inline const char *align_start(const char *start, size_t len) {
|
2020-09-19 23:53:12 +00:00
|
|
|
static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
|
|
|
|
static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
|
|
|
|
uintptr_t startu = reinterpret_cast<uintptr_t>(start);
|
|
|
|
// How much do we have to add to start to make it 0 mod Align?
|
|
|
|
// To compute 17 up-aligned by 8, compute its skew 17 % 8, yielding 1,
|
|
|
|
// and then we will add 8 - 1. Of course if we align 16 with the same idea, we will
|
2022-10-29 03:07:36 +00:00
|
|
|
// add 8 instead of 0, so then mod the sum by Align again.
|
2020-09-19 23:53:12 +00:00
|
|
|
// Note all of these mods are optimized to masks.
|
|
|
|
uintptr_t add_which_aligns = Align - (startu % Align);
|
|
|
|
add_which_aligns %= Align;
|
|
|
|
// Add that much but not more than len. If we add 'add_which_aligns' we may overflow the
|
|
|
|
// pointer.
|
|
|
|
return start + std::min(static_cast<size_t>(add_which_aligns), len);
|
2020-09-08 01:24:34 +00:00
|
|
|
}
|
|
|
|
|
2020-09-19 23:53:12 +00:00
|
|
|
/// \return the largest pointer in the range [start, start + len] which is aligned to Align.
|
|
|
|
/// If there is no such pointer, return \p start.
|
|
|
|
/// This is intended to be the start point of the "unaligned suffix" of a vectorized loop.
|
|
|
|
template <size_t Align>
|
2021-09-28 00:34:49 +00:00
|
|
|
static inline const char *align_end(const char *start, size_t len) {
|
2020-09-19 23:53:12 +00:00
|
|
|
static_assert(Align >= 1 && Align <= 64, "Alignment must be in range [1, 64]");
|
|
|
|
static_assert((Align & (Align - 1)) == 0, "Alignment must be power of 2");
|
|
|
|
// How much do we have to subtract to align it? Its value, mod Align.
|
|
|
|
uintptr_t endu = reinterpret_cast<uintptr_t>(start + len);
|
|
|
|
uintptr_t sub_which_aligns = endu % Align;
|
|
|
|
return start + len - std::min(static_cast<size_t>(sub_which_aligns), len);
|
2020-09-08 00:31:07 +00:00
|
|
|
}
|
|
|
|
|
2020-09-19 23:53:12 +00:00
|
|
|
/// \return the count of initial characters in \p in which are ASCII.
|
|
|
|
static size_t count_ascii_prefix(const char *in, size_t in_len) {
|
|
|
|
// We'll use aligned reads of this type.
|
|
|
|
using WordType = uint32_t;
|
|
|
|
const char *aligned_start = align_start<alignof(WordType)>(in, in_len);
|
|
|
|
const char *aligned_end = align_end<alignof(WordType)>(in, in_len);
|
|
|
|
|
|
|
|
// Consume the unaligned prefix.
|
|
|
|
for (const char *cursor = in; cursor < aligned_start; cursor++) {
|
|
|
|
if (cursor[0] & 0x80) return &cursor[0] - in;
|
2020-09-08 00:31:07 +00:00
|
|
|
}
|
|
|
|
|
2020-09-19 23:53:12 +00:00
|
|
|
// Consume the aligned middle.
|
|
|
|
for (const char *cursor = aligned_start; cursor < aligned_end; cursor += sizeof(WordType)) {
|
|
|
|
if (*reinterpret_cast<const WordType *>(cursor) & 0x80808080) {
|
|
|
|
if (cursor[0] & 0x80) return &cursor[0] - in;
|
|
|
|
if (cursor[1] & 0x80) return &cursor[1] - in;
|
|
|
|
if (cursor[2] & 0x80) return &cursor[2] - in;
|
|
|
|
return &cursor[3] - in;
|
|
|
|
}
|
|
|
|
}
|
2020-09-08 00:31:07 +00:00
|
|
|
|
2020-09-19 23:53:12 +00:00
|
|
|
// Consume the unaligned suffix.
|
|
|
|
for (const char *cursor = aligned_end; cursor < in + in_len; cursor++) {
|
|
|
|
if (cursor[0] & 0x80) return &cursor[0] - in;
|
2020-09-19 22:01:38 +00:00
|
|
|
}
|
|
|
|
return in_len;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
/// Converts the narrow character string \c in into its wide equivalent, and return it.
|
|
|
|
///
|
|
|
|
/// The string may contain embedded nulls.
|
|
|
|
///
|
|
|
|
/// This function encodes illegal character sequences in a reversible way using the private use
|
|
|
|
/// area.
|
|
|
|
static wcstring str2wcs_internal(const char *in, const size_t in_len) {
|
|
|
|
if (in_len == 0) return wcstring();
|
2019-11-19 02:34:50 +00:00
|
|
|
assert(in != nullptr);
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2012-12-20 20:25:35 +00:00
|
|
|
wcstring result;
|
|
|
|
result.reserve(in_len);
|
2016-03-11 02:17:39 +00:00
|
|
|
|
2020-09-19 22:01:38 +00:00
|
|
|
// In the unlikely event that MB_CUR_MAX is 1, then we are just going to append.
|
|
|
|
if (MB_CUR_MAX == 1) {
|
2020-09-20 13:05:49 +00:00
|
|
|
size_t in_pos = 0;
|
|
|
|
while (in_pos < in_len) {
|
|
|
|
result.push_back(static_cast<unsigned char>(in[in_pos]));
|
|
|
|
in_pos++;
|
|
|
|
}
|
2016-03-11 02:17:39 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2020-09-19 22:01:38 +00:00
|
|
|
size_t in_pos = 0;
|
2016-03-11 02:17:39 +00:00
|
|
|
mbstate_t state = {};
|
2016-04-27 23:10:14 +00:00
|
|
|
while (in_pos < in_len) {
|
2020-09-19 22:01:38 +00:00
|
|
|
// Append any initial sequence of ascii characters.
|
2020-09-19 23:53:12 +00:00
|
|
|
// Note we do not support character sets which are not supersets of ASCII.
|
2020-09-19 22:01:38 +00:00
|
|
|
size_t ascii_prefix_length = count_ascii_prefix(&in[in_pos], in_len - in_pos);
|
|
|
|
result.insert(result.end(), &in[in_pos], &in[in_pos + ascii_prefix_length]);
|
|
|
|
in_pos += ascii_prefix_length;
|
|
|
|
assert(in_pos <= in_len && "Position overflowed length");
|
|
|
|
if (in_pos == in_len) break;
|
|
|
|
|
|
|
|
// We have found a non-ASCII character.
|
2016-05-19 00:46:13 +00:00
|
|
|
bool use_encode_direct = false;
|
2016-07-30 10:08:57 +00:00
|
|
|
size_t ret = 0;
|
2012-12-20 20:25:35 +00:00
|
|
|
wchar_t wc = 0;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2020-09-07 23:03:22 +00:00
|
|
|
if (false) {
|
|
|
|
#if defined(HAVE_BROKEN_MBRTOWC_UTF8)
|
|
|
|
} else if ((in[in_pos] & 0xF8) == 0xF8) {
|
2019-03-12 21:06:01 +00:00
|
|
|
// Protect against broken std::mbrtowc() implementations which attempt to encode UTF-8
|
2016-05-19 00:46:13 +00:00
|
|
|
// sequences longer than four bytes (e.g., OS X Snow Leopard).
|
2012-12-20 20:25:35 +00:00
|
|
|
use_encode_direct = true;
|
2020-09-07 23:03:22 +00:00
|
|
|
#endif
|
2016-10-30 02:01:19 +00:00
|
|
|
} else if (sizeof(wchar_t) == 2 && //!OCLINT(constant if expression)
|
|
|
|
(in[in_pos] & 0xF8) == 0xF0) {
|
2016-05-23 02:00:13 +00:00
|
|
|
// Assume we are in a UTF-16 environment (e.g., Cygwin) using a UTF-8 encoding.
|
|
|
|
// The bits set check will be true for a four byte UTF-8 sequence that requires
|
2019-03-12 21:06:01 +00:00
|
|
|
// two UTF-16 chars. Something that doesn't work with our simple use of std::mbrtowc().
|
2016-05-23 02:00:13 +00:00
|
|
|
use_encode_direct = true;
|
2016-05-19 00:46:13 +00:00
|
|
|
} else {
|
2019-03-12 21:06:01 +00:00
|
|
|
ret = std::mbrtowc(&wc, &in[in_pos], in_len - in_pos, &state);
|
2016-05-23 02:00:13 +00:00
|
|
|
// Determine whether to encode this character with our crazy scheme.
|
2016-05-19 00:46:13 +00:00
|
|
|
if (wc >= ENCODE_DIRECT_BASE && wc < ENCODE_DIRECT_BASE + 256) {
|
|
|
|
use_encode_direct = true;
|
|
|
|
} else if (wc == INTERNAL_SEPARATOR) {
|
|
|
|
use_encode_direct = true;
|
2019-11-19 01:08:16 +00:00
|
|
|
} else if (ret == static_cast<size_t>(-2)) {
|
2016-05-19 00:46:13 +00:00
|
|
|
// Incomplete sequence.
|
|
|
|
use_encode_direct = true;
|
2019-11-19 01:08:16 +00:00
|
|
|
} else if (ret == static_cast<size_t>(-1)) {
|
2016-05-19 00:46:13 +00:00
|
|
|
// Invalid data.
|
|
|
|
use_encode_direct = true;
|
|
|
|
} else if (ret > in_len - in_pos) {
|
|
|
|
// Other error codes? Terrifying, should never happen.
|
|
|
|
use_encode_direct = true;
|
2016-10-30 02:01:19 +00:00
|
|
|
} else if (sizeof(wchar_t) == 2 && wc >= 0xD800 && //!OCLINT(constant if expression)
|
|
|
|
wc <= 0xDFFF) {
|
2016-05-23 02:00:13 +00:00
|
|
|
// If we get a surrogate pair char on a UTF-16 system (e.g., Cygwin) then
|
|
|
|
// it's guaranteed the UTF-8 decoding is wrong so use direct encoding.
|
|
|
|
use_encode_direct = true;
|
2016-05-19 00:46:13 +00:00
|
|
|
}
|
2012-12-20 20:25:35 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (use_encode_direct) {
|
2019-11-19 01:08:16 +00:00
|
|
|
wc = ENCODE_DIRECT_BASE + static_cast<unsigned char>(in[in_pos]);
|
2012-12-20 20:25:35 +00:00
|
|
|
result.push_back(wc);
|
2012-11-19 00:30:30 +00:00
|
|
|
in_pos++;
|
2019-03-12 22:07:07 +00:00
|
|
|
std::memset(&state, 0, sizeof state);
|
2016-10-30 02:01:19 +00:00
|
|
|
} else if (ret == 0) { // embedded null byte!
|
2012-12-20 20:25:35 +00:00
|
|
|
result.push_back(L'\0');
|
|
|
|
in_pos++;
|
2019-03-12 22:07:07 +00:00
|
|
|
std::memset(&state, 0, sizeof state);
|
2016-10-30 02:01:19 +00:00
|
|
|
} else { // normal case
|
2012-12-20 20:25:35 +00:00
|
|
|
result.push_back(wc);
|
|
|
|
in_pos += ret;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
}
|
2016-10-30 02:01:19 +00:00
|
|
|
|
2012-12-20 20:25:35 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
wcstring str2wcstring(const char *in, size_t len) { return str2wcs_internal(in, len); }
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2019-03-12 22:07:07 +00:00
|
|
|
wcstring str2wcstring(const char *in) { return str2wcs_internal(in, std::strlen(in)); }
|
2012-12-20 20:25:35 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
wcstring str2wcstring(const std::string &in) {
|
2016-05-19 00:46:13 +00:00
|
|
|
// Handles embedded nulls!
|
2012-12-20 20:25:35 +00:00
|
|
|
return str2wcs_internal(in.data(), in.size());
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2017-12-19 10:05:20 +00:00
|
|
|
wcstring str2wcstring(const std::string &in, size_t len) {
|
|
|
|
// Handles embedded nulls!
|
|
|
|
return str2wcs_internal(in.data(), len);
|
|
|
|
}
|
|
|
|
|
2021-02-04 21:28:48 +00:00
|
|
|
std::string wcs2string(const wcstring &input) { return wcs2string(input.data(), input.size()); }
|
|
|
|
|
|
|
|
std::string wcs2string(const wchar_t *in, size_t len) {
|
2021-02-04 23:18:34 +00:00
|
|
|
if (len == 0) return std::string{};
|
2012-12-12 23:44:01 +00:00
|
|
|
std::string result;
|
2021-02-17 23:36:22 +00:00
|
|
|
wcs2string_appending(in, len, &result);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void wcs2string_appending(const wchar_t *in, size_t len, std::string *receiver) {
|
|
|
|
assert(receiver && "Null receiver");
|
|
|
|
receiver->reserve(receiver->size() + len);
|
2021-02-04 21:28:48 +00:00
|
|
|
wcs2string_callback(in, len, [&](const char *buff, size_t bufflen) {
|
2021-02-17 23:36:22 +00:00
|
|
|
receiver->append(buff, bufflen);
|
2020-07-30 00:16:51 +00:00
|
|
|
return true;
|
|
|
|
});
|
2011-12-27 03:18:46 +00:00
|
|
|
}
|
|
|
|
|
2016-11-06 05:03:20 +00:00
|
|
|
/// Test if the character can be encoded using the current locale.
|
|
|
|
static bool can_be_encoded(wchar_t wc) {
|
|
|
|
char converted[MB_LEN_MAX];
|
|
|
|
mbstate_t state = {};
|
|
|
|
|
2019-11-19 01:08:16 +00:00
|
|
|
return std::wcrtomb(converted, wc, &state) != static_cast<size_t>(-1);
|
2016-11-06 05:03:20 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
wcstring format_string(const wchar_t *format, ...) {
|
2012-11-19 00:30:30 +00:00
|
|
|
va_list va;
|
|
|
|
va_start(va, format);
|
2012-02-10 02:43:36 +00:00
|
|
|
wcstring result = vformat_string(format, va);
|
2012-11-19 00:30:30 +00:00
|
|
|
va_end(va);
|
2012-02-10 02:43:36 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void append_formatv(wcstring &target, const wchar_t *format, va_list va_orig) {
|
2012-03-04 03:12:06 +00:00
|
|
|
const int saved_err = errno;
|
2016-04-27 23:10:14 +00:00
|
|
|
// As far as I know, there is no way to check if a vswprintf-call failed because of a badly
|
|
|
|
// formated string option or because the supplied destination string was to small. In GLIBC,
|
|
|
|
// errno seems to be set to EINVAL either way.
|
|
|
|
//
|
2019-10-19 01:36:03 +00:00
|
|
|
// Because of this, on failure we try to increase the buffer size until the free space is
|
2016-04-27 23:10:14 +00:00
|
|
|
// larger than max_size, at which point it will conclude that the error was probably due to a
|
|
|
|
// badly formated string option, and return an error. Make sure to null terminate string before
|
|
|
|
// that, though.
|
|
|
|
const size_t max_size = (128 * 1024 * 1024);
|
2012-03-04 03:12:06 +00:00
|
|
|
wchar_t static_buff[256];
|
|
|
|
size_t size = 0;
|
2019-11-19 02:34:50 +00:00
|
|
|
wchar_t *buff = nullptr;
|
2012-03-04 03:12:06 +00:00
|
|
|
int status = -1;
|
2016-04-27 23:10:14 +00:00
|
|
|
while (status < 0) {
|
|
|
|
// Reallocate if necessary.
|
|
|
|
if (size == 0) {
|
2012-03-04 03:12:06 +00:00
|
|
|
buff = static_buff;
|
|
|
|
size = sizeof static_buff;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2012-03-04 03:12:06 +00:00
|
|
|
size *= 2;
|
2016-04-27 23:10:14 +00:00
|
|
|
if (size >= max_size) {
|
2012-03-04 03:12:06 +00:00
|
|
|
buff[0] = '\0';
|
|
|
|
break;
|
|
|
|
}
|
2019-11-19 02:34:50 +00:00
|
|
|
buff = static_cast<wchar_t *>(realloc((buff == static_buff ? nullptr : buff), size));
|
|
|
|
assert(buff != nullptr);
|
2012-03-04 03:12:06 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Try printing.
|
2012-11-19 00:30:30 +00:00
|
|
|
va_list va;
|
|
|
|
va_copy(va, va_orig);
|
2019-03-12 21:06:01 +00:00
|
|
|
status = std::vswprintf(buff, size / sizeof(wchar_t), format, va);
|
2012-11-19 00:30:30 +00:00
|
|
|
va_end(va);
|
2012-03-04 03:12:06 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2014-04-30 23:29:52 +00:00
|
|
|
target.append(buff);
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (buff != static_buff) {
|
2012-03-04 03:12:06 +00:00
|
|
|
free(buff);
|
2014-04-30 23:29:52 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2012-03-04 03:12:06 +00:00
|
|
|
errno = saved_err;
|
2011-12-27 03:18:46 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
wcstring vformat_string(const wchar_t *format, va_list va_orig) {
|
2014-04-30 23:29:52 +00:00
|
|
|
wcstring result;
|
|
|
|
append_formatv(result, format, va_orig);
|
|
|
|
return result;
|
2013-03-24 22:24:29 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void append_format(wcstring &str, const wchar_t *format, ...) {
|
2012-11-19 00:30:30 +00:00
|
|
|
va_list va;
|
|
|
|
va_start(va, format);
|
2013-03-24 22:24:29 +00:00
|
|
|
append_formatv(str, format, va);
|
2012-11-19 00:30:30 +00:00
|
|
|
va_end(va);
|
2012-02-22 18:51:06 +00:00
|
|
|
}
|
2012-02-10 02:43:36 +00:00
|
|
|
|
2021-06-26 04:16:03 +00:00
|
|
|
const wchar_t *quote_end(const wchar_t *pos, wchar_t quote) {
|
2019-11-26 00:36:13 +00:00
|
|
|
while (true) {
|
2012-11-19 00:30:30 +00:00
|
|
|
pos++;
|
|
|
|
|
2019-11-19 02:34:50 +00:00
|
|
|
if (!*pos) return nullptr;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (*pos == L'\\') {
|
2012-11-19 00:30:30 +00:00
|
|
|
pos++;
|
2019-11-19 02:34:50 +00:00
|
|
|
if (!*pos) return nullptr;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2021-07-02 21:11:03 +00:00
|
|
|
if (*pos == quote ||
|
|
|
|
// Command substitutions also end a double quoted string. This is how we
|
|
|
|
// support command substitutions inside double quotes.
|
|
|
|
(quote == L'"' && *pos == L'$' && *(pos + 1) == L'(')) {
|
2021-06-26 04:16:03 +00:00
|
|
|
return pos;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-11-19 02:34:50 +00:00
|
|
|
return nullptr;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2022-02-04 20:44:45 +00:00
|
|
|
const wchar_t *comment_end(const wchar_t *pos) {
|
|
|
|
do {
|
|
|
|
pos++;
|
|
|
|
} while (*pos && *pos != L'\n');
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
2016-06-04 02:05:13 +00:00
|
|
|
void fish_setlocale() {
|
2017-09-21 05:00:14 +00:00
|
|
|
// Use various Unicode symbols if they can be encoded using the current locale, else a simple
|
|
|
|
// ASCII char alternative. All of the can_be_encoded() invocations should return the same
|
|
|
|
// true/false value since the code points are in the BMP but we're going to be paranoid. This
|
|
|
|
// is also technically wrong if we're not in a Unicode locale but we expect (or hope)
|
|
|
|
// can_be_encoded() will return false in that case.
|
2018-03-09 20:40:35 +00:00
|
|
|
if (can_be_encoded(L'\u2026')) {
|
|
|
|
ellipsis_char = L'\u2026';
|
|
|
|
ellipsis_str = L"\u2026";
|
2018-09-28 02:28:39 +00:00
|
|
|
} else {
|
|
|
|
ellipsis_char = L'$'; // "horizontal ellipsis"
|
2018-03-09 20:40:35 +00:00
|
|
|
ellipsis_str = L"...";
|
|
|
|
}
|
2019-01-30 20:31:11 +00:00
|
|
|
|
2018-03-28 19:27:25 +00:00
|
|
|
if (is_windows_subsystem_for_linux()) {
|
2018-09-28 02:28:39 +00:00
|
|
|
// neither of \u23CE and \u25CF can be displayed in the default fonts on Windows, though
|
|
|
|
// they can be *encoded* just fine. Use alternative glyphs.
|
2019-05-05 10:09:25 +00:00
|
|
|
omitted_newline_str = L"\u00b6"; // "pilcrow"
|
2019-01-30 21:42:59 +00:00
|
|
|
omitted_newline_width = 1;
|
2019-05-05 10:09:25 +00:00
|
|
|
obfuscation_read_char = L'\u2022'; // "bullet"
|
2019-01-30 20:31:11 +00:00
|
|
|
} else if (is_console_session()) {
|
2019-01-30 21:42:59 +00:00
|
|
|
omitted_newline_str = L"^J";
|
|
|
|
omitted_newline_width = 2;
|
2019-01-30 20:31:11 +00:00
|
|
|
obfuscation_read_char = L'*';
|
2018-09-28 02:28:39 +00:00
|
|
|
} else {
|
2019-01-30 21:42:59 +00:00
|
|
|
if (can_be_encoded(L'\u23CE')) {
|
2021-04-02 06:07:25 +00:00
|
|
|
omitted_newline_str = L"\u23CE"; // "return symbol" (⏎)
|
2019-01-30 21:42:59 +00:00
|
|
|
omitted_newline_width = 1;
|
|
|
|
} else {
|
|
|
|
omitted_newline_str = L"^J";
|
|
|
|
omitted_newline_width = 2;
|
|
|
|
}
|
2018-03-28 19:27:25 +00:00
|
|
|
obfuscation_read_char = can_be_encoded(L'\u25CF') ? L'\u25CF' : L'#'; // "black circle"
|
|
|
|
}
|
2005-09-20 13:26:39 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
long read_blocked(int fd, void *buf, size_t count) {
|
2020-07-31 03:58:19 +00:00
|
|
|
ssize_t res;
|
|
|
|
do {
|
|
|
|
res = read(fd, buf, count);
|
|
|
|
} while (res < 0 && errno == EINTR);
|
|
|
|
return res;
|
2005-09-20 13:26:39 +00:00
|
|
|
}
|
|
|
|
|
2017-01-07 07:51:49 +00:00
|
|
|
/// Loop a write request while failure is non-critical. Return -1 and set errno in case of critical
|
|
|
|
/// error.
|
2016-04-27 23:10:14 +00:00
|
|
|
ssize_t write_loop(int fd, const char *buff, size_t count) {
|
|
|
|
size_t out_cum = 0;
|
|
|
|
while (out_cum < count) {
|
2013-01-05 06:32:40 +00:00
|
|
|
ssize_t out = write(fd, &buff[out_cum], count - out_cum);
|
2016-04-27 23:10:14 +00:00
|
|
|
if (out < 0) {
|
|
|
|
if (errno != EAGAIN && errno != EINTR) {
|
2012-11-19 00:30:30 +00:00
|
|
|
return -1;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2019-11-19 01:08:16 +00:00
|
|
|
out_cum += static_cast<size_t>(out);
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
}
|
2019-11-19 01:08:16 +00:00
|
|
|
return static_cast<ssize_t>(out_cum);
|
2009-02-22 20:28:52 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
ssize_t read_loop(int fd, void *buff, size_t count) {
|
2012-03-01 01:55:28 +00:00
|
|
|
ssize_t result;
|
2016-04-27 23:10:14 +00:00
|
|
|
do {
|
2012-03-01 01:55:28 +00:00
|
|
|
result = read(fd, buff, count);
|
2016-04-27 23:10:14 +00:00
|
|
|
} while (result < 0 && (errno == EAGAIN || errno == EINTR));
|
2012-03-01 01:55:28 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-02-15 05:09:15 +00:00
|
|
|
/// Hack to not print error messages in the tests. Do not call this from functions in this module
|
2019-11-25 11:03:25 +00:00
|
|
|
/// like `debug()`. It is only intended to suppress diagnostic noise from testing things like the
|
2017-02-15 05:09:15 +00:00
|
|
|
/// fish parser where we expect a lot of diagnostic messages due to testing error conditions.
|
2016-12-03 21:27:50 +00:00
|
|
|
bool should_suppress_stderr_for_tests() {
|
2019-03-12 21:06:01 +00:00
|
|
|
return program_name && !std::wcscmp(program_name, TESTS_PROGRAM_NAME);
|
2016-12-03 21:27:50 +00:00
|
|
|
}
|
|
|
|
|
2019-02-04 00:06:10 +00:00
|
|
|
// Careful to not negate LLONG_MIN.
|
|
|
|
static unsigned long long absolute_value(long long x) {
|
|
|
|
if (x >= 0) return static_cast<unsigned long long>(x);
|
|
|
|
x = -(x + 1);
|
|
|
|
return static_cast<unsigned long long>(x) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename CharT>
|
2021-09-28 00:34:49 +00:00
|
|
|
static void format_safe_impl(CharT *buff, size_t size, unsigned long long val) {
|
2019-02-04 00:06:10 +00:00
|
|
|
size_t idx = 0;
|
2016-04-27 23:10:14 +00:00
|
|
|
if (val == 0) {
|
2019-02-04 00:06:10 +00:00
|
|
|
buff[idx++] = '0';
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2019-02-04 00:06:10 +00:00
|
|
|
// Generate the string backwards, then reverse it.
|
2016-04-27 23:10:14 +00:00
|
|
|
while (val != 0) {
|
2019-02-04 00:06:10 +00:00
|
|
|
buff[idx++] = (val % 10) + '0';
|
2012-02-29 19:27:14 +00:00
|
|
|
val /= 10;
|
|
|
|
}
|
2019-02-04 00:06:10 +00:00
|
|
|
std::reverse(buff, buff + idx);
|
2012-02-29 19:27:14 +00:00
|
|
|
}
|
2019-02-04 00:06:10 +00:00
|
|
|
buff[idx++] = '\0';
|
|
|
|
assert(idx <= size && "Buffer overflowed");
|
2012-02-29 19:27:14 +00:00
|
|
|
}
|
|
|
|
|
2019-02-04 00:06:10 +00:00
|
|
|
void format_long_safe(char buff[64], long val) {
|
|
|
|
unsigned long long uval = absolute_value(val);
|
|
|
|
if (val >= 0) {
|
|
|
|
format_safe_impl(buff, 64, uval);
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2019-02-04 00:06:10 +00:00
|
|
|
buff[0] = '-';
|
|
|
|
format_safe_impl(buff + 1, 63, uval);
|
|
|
|
}
|
|
|
|
}
|
2012-03-03 23:20:30 +00:00
|
|
|
|
2019-02-04 00:06:10 +00:00
|
|
|
void format_long_safe(wchar_t buff[64], long val) {
|
|
|
|
unsigned long long uval = absolute_value(val);
|
|
|
|
if (val >= 0) {
|
|
|
|
format_safe_impl(buff, 64, uval);
|
|
|
|
} else {
|
|
|
|
buff[0] = '-';
|
|
|
|
format_safe_impl(buff + 1, 63, uval);
|
2012-03-03 23:20:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-08-04 00:32:38 +00:00
|
|
|
void format_llong_safe(wchar_t buff[64], long long val) {
|
|
|
|
unsigned long long uval = absolute_value(val);
|
|
|
|
if (val >= 0) {
|
|
|
|
format_safe_impl(buff, 64, uval);
|
|
|
|
} else {
|
|
|
|
buff[0] = '-';
|
|
|
|
format_safe_impl(buff + 1, 63, uval);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-04 00:06:10 +00:00
|
|
|
void format_ullong_safe(wchar_t buff[64], unsigned long long val) {
|
|
|
|
return format_safe_impl(buff, 64, val);
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void narrow_string_safe(char buff[64], const wchar_t *s) {
|
2016-02-28 09:38:28 +00:00
|
|
|
size_t idx = 0;
|
2016-04-27 23:10:14 +00:00
|
|
|
for (size_t widx = 0; s[widx] != L'\0'; widx++) {
|
2016-02-28 09:38:28 +00:00
|
|
|
wchar_t c = s[widx];
|
2016-04-27 23:10:14 +00:00
|
|
|
if (c <= 127) {
|
2016-02-28 09:38:28 +00:00
|
|
|
buff[idx++] = char(c);
|
2016-04-27 23:10:14 +00:00
|
|
|
if (idx + 1 == 64) {
|
2016-02-28 09:38:28 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
buff[idx] = '\0';
|
|
|
|
}
|
|
|
|
|
2020-06-08 01:47:27 +00:00
|
|
|
wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize) {
|
2015-09-21 18:24:49 +00:00
|
|
|
wcstring buff;
|
2022-04-07 16:24:39 +00:00
|
|
|
|
2020-06-08 01:47:27 +00:00
|
|
|
int screen_width = termsize.width;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (screen_width) {
|
2014-01-12 21:33:35 +00:00
|
|
|
const wchar_t *start = msg.c_str();
|
|
|
|
const wchar_t *pos = start;
|
2022-04-07 16:24:39 +00:00
|
|
|
int line_width = 0;
|
2019-11-26 00:36:13 +00:00
|
|
|
while (true) {
|
2012-11-19 00:30:30 +00:00
|
|
|
int overflow = 0;
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
int tok_width = 0;
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Tokenize on whitespace, and also calculate the width of the token.
|
2019-03-12 21:06:01 +00:00
|
|
|
while (*pos && (!std::wcschr(L" \n\r\t", *pos))) {
|
2016-04-27 23:10:14 +00:00
|
|
|
// Check is token is wider than one line. If so we mark it as an overflow and break
|
|
|
|
// the token.
|
|
|
|
if ((tok_width + fish_wcwidth(*pos)) > (screen_width - 1)) {
|
2012-11-19 00:30:30 +00:00
|
|
|
overflow = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
tok_width += fish_wcwidth(*pos);
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// If token is zero character long, we don't do anything.
|
|
|
|
if (pos == start) {
|
2016-06-15 02:21:50 +00:00
|
|
|
pos = pos + 1;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (overflow) {
|
|
|
|
// In case of overflow, we print a newline, except if we already are at position 0.
|
2022-01-07 17:42:39 +00:00
|
|
|
wcstring token = msg.substr(start - msg.c_str(), pos - start);
|
2016-04-27 23:10:14 +00:00
|
|
|
if (line_width != 0) buff.push_back(L'\n');
|
2022-01-07 17:42:39 +00:00
|
|
|
buff.append(format_string(L"%ls-\n", token.c_str()));
|
2016-04-27 23:10:14 +00:00
|
|
|
line_width = 0;
|
|
|
|
} else {
|
|
|
|
// Print the token.
|
2022-01-07 17:42:39 +00:00
|
|
|
wcstring token = msg.substr(start - msg.c_str(), pos - start);
|
2016-04-27 23:10:14 +00:00
|
|
|
if ((line_width + (line_width != 0 ? 1 : 0) + tok_width) > screen_width) {
|
2012-02-22 18:51:06 +00:00
|
|
|
buff.push_back(L'\n');
|
2016-04-27 23:10:14 +00:00
|
|
|
line_width = 0;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2022-01-07 17:42:39 +00:00
|
|
|
buff.append(format_string(L"%ls%ls", line_width ? L" " : L"", token.c_str()));
|
2016-04-27 23:10:14 +00:00
|
|
|
line_width += (line_width != 0 ? 1 : 0) + tok_width;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Break on end of string.
|
|
|
|
if (!*pos) {
|
2012-11-19 00:30:30 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
start = pos;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2012-02-22 18:51:06 +00:00
|
|
|
buff.append(msg);
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2012-02-22 18:51:06 +00:00
|
|
|
buff.push_back(L'\n');
|
2015-09-21 18:24:49 +00:00
|
|
|
return buff;
|
2006-01-15 11:58:05 +00:00
|
|
|
}
|
|
|
|
|
2017-06-21 04:55:16 +00:00
|
|
|
/// Escape a string in a fashion suitable for using as a URL. Store the result in out_str.
|
2018-01-08 09:52:29 +00:00
|
|
|
static void escape_string_url(const wcstring &in, wcstring &out) {
|
2018-11-07 11:37:47 +00:00
|
|
|
const std::string narrow = wcs2string(in);
|
|
|
|
for (auto &c1 : narrow) {
|
2017-06-21 04:55:16 +00:00
|
|
|
// This silliness is so we get the correct result whether chars are signed or unsigned.
|
2019-11-19 01:08:16 +00:00
|
|
|
unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
|
2017-06-21 04:55:16 +00:00
|
|
|
if (!(c2 & 0x80) &&
|
|
|
|
(isalnum(c2) || c2 == '/' || c2 == '.' || c2 == '~' || c2 == '-' || c2 == '_')) {
|
|
|
|
// The above characters don't need to be encoded.
|
2019-11-19 01:08:16 +00:00
|
|
|
out.push_back(static_cast<wchar_t>(c2));
|
2017-06-21 04:55:16 +00:00
|
|
|
} else {
|
|
|
|
// All other chars need to have their UTF-8 representation encoded in hex.
|
|
|
|
wchar_t buf[4];
|
|
|
|
swprintf(buf, sizeof buf / sizeof buf[0], L"%%%02X", c2);
|
|
|
|
out.append(buf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-10-14 11:40:33 +00:00
|
|
|
|
2017-06-23 03:47:54 +00:00
|
|
|
/// Reverse the effects of `escape_string_url()`. By definition the string has consist of just ASCII
|
|
|
|
/// chars.
|
|
|
|
static bool unescape_string_url(const wchar_t *in, wcstring *out) {
|
|
|
|
std::string result;
|
|
|
|
result.reserve(out->size());
|
|
|
|
for (wchar_t c = *in; c; c = *++in) {
|
|
|
|
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
|
|
|
if (c == '%') {
|
|
|
|
int c1 = in[1];
|
|
|
|
if (c1 == 0) return false; // found unexpected end of string
|
|
|
|
if (c1 == '%') {
|
|
|
|
result.push_back('%');
|
|
|
|
in++;
|
|
|
|
} else {
|
|
|
|
int c2 = in[2];
|
|
|
|
if (c2 == 0) return false; // string ended prematurely
|
|
|
|
long d1 = convert_digit(c1, 16);
|
|
|
|
if (d1 < 0) return false;
|
|
|
|
long d2 = convert_digit(c2, 16);
|
|
|
|
if (d2 < 0) return false;
|
|
|
|
result.push_back(16 * d1 + d2);
|
|
|
|
in += 2;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
result.push_back(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*out = str2wcstring(result);
|
|
|
|
return true;
|
|
|
|
}
|
2017-06-21 04:55:16 +00:00
|
|
|
|
|
|
|
/// Escape a string in a fashion suitable for using as a fish var name. Store the result in out_str.
|
2018-01-08 09:52:29 +00:00
|
|
|
static void escape_string_var(const wcstring &in, wcstring &out) {
|
2017-06-21 04:55:16 +00:00
|
|
|
bool prev_was_hex_encoded = false;
|
2018-11-07 11:37:47 +00:00
|
|
|
const std::string narrow = wcs2string(in);
|
|
|
|
for (auto c1 : narrow) {
|
|
|
|
// This silliness is so we get the correct result whether chars are signed or unsigned.
|
2019-11-19 01:08:16 +00:00
|
|
|
unsigned int c2 = static_cast<unsigned int>(c1) & 0xFF;
|
2018-11-07 11:37:47 +00:00
|
|
|
if (!(c2 & 0x80) && isalnum(c2) && (!prev_was_hex_encoded || !is_hex_digit(c2))) {
|
2017-06-21 04:55:16 +00:00
|
|
|
// ASCII alphanumerics don't need to be encoded.
|
|
|
|
if (prev_was_hex_encoded) {
|
|
|
|
out.push_back(L'_');
|
|
|
|
prev_was_hex_encoded = false;
|
|
|
|
}
|
2019-11-19 01:08:16 +00:00
|
|
|
out.push_back(static_cast<wchar_t>(c2));
|
2018-11-07 11:37:47 +00:00
|
|
|
} else if (c2 == '_') {
|
2017-06-21 04:55:16 +00:00
|
|
|
// Underscores are encoded by doubling them.
|
|
|
|
out.append(L"__");
|
|
|
|
prev_was_hex_encoded = false;
|
|
|
|
} else {
|
|
|
|
// All other chars need to have their UTF-8 representation encoded in hex.
|
|
|
|
wchar_t buf[4];
|
2018-11-07 11:37:47 +00:00
|
|
|
swprintf(buf, sizeof buf / sizeof buf[0], L"_%02X", c2);
|
2017-06-21 04:55:16 +00:00
|
|
|
out.append(buf);
|
|
|
|
prev_was_hex_encoded = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (prev_was_hex_encoded) {
|
|
|
|
out.push_back(L'_');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-06-23 03:47:54 +00:00
|
|
|
/// Reverse the effects of `escape_string_var()`. By definition the string has consist of just ASCII
|
|
|
|
/// chars.
|
|
|
|
static bool unescape_string_var(const wchar_t *in, wcstring *out) {
|
|
|
|
std::string result;
|
|
|
|
result.reserve(out->size());
|
|
|
|
bool prev_was_hex_encoded = false;
|
|
|
|
for (wchar_t c = *in; c; c = *++in) {
|
|
|
|
if (c > 0x7F) return false; // invalid character means we can't decode the string
|
|
|
|
if (c == '_') {
|
|
|
|
int c1 = in[1];
|
|
|
|
if (c1 == 0) {
|
|
|
|
if (prev_was_hex_encoded) break;
|
|
|
|
return false; // found unexpected escape char at end of string
|
|
|
|
}
|
|
|
|
if (c1 == '_') {
|
|
|
|
result.push_back('_');
|
|
|
|
in++;
|
|
|
|
} else if (is_hex_digit(c1)) {
|
|
|
|
int c2 = in[2];
|
|
|
|
if (c2 == 0) return false; // string ended prematurely
|
|
|
|
long d1 = convert_hex_digit(c1);
|
|
|
|
if (d1 < 0) return false;
|
|
|
|
long d2 = convert_hex_digit(c2);
|
|
|
|
if (d2 < 0) return false;
|
|
|
|
result.push_back(16 * d1 + d2);
|
|
|
|
in += 2;
|
|
|
|
prev_was_hex_encoded = true;
|
|
|
|
}
|
|
|
|
// No "else" clause because if the first char after an underscore is not another
|
|
|
|
// underscore or a valid hex character then the underscore is there to improve
|
|
|
|
// readability after we've encoded a character not valid in a var name.
|
|
|
|
} else {
|
|
|
|
result.push_back(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*out = str2wcstring(result);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-07-02 21:11:03 +00:00
|
|
|
wcstring escape_string_for_double_quotes(wcstring in) {
|
|
|
|
// We need to escape backslashes, double quotes, and dollars only.
|
|
|
|
wcstring result = std::move(in);
|
|
|
|
size_t idx = result.size();
|
|
|
|
while (idx--) {
|
|
|
|
switch (result[idx]) {
|
|
|
|
case L'\\':
|
|
|
|
case L'$':
|
|
|
|
case L'"':
|
|
|
|
result.insert(idx, 1, L'\\');
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-06-21 04:55:16 +00:00
|
|
|
/// Escape a string in a fashion suitable for using in fish script. Store the result in out_str.
|
|
|
|
static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring &out,
|
|
|
|
escape_flags_t flags) {
|
2014-01-08 23:06:09 +00:00
|
|
|
const wchar_t *in = orig_in;
|
2022-07-25 14:25:04 +00:00
|
|
|
const bool escape_printables = !(flags & ESCAPE_NO_PRINTABLES);
|
2018-04-24 22:53:30 +00:00
|
|
|
const bool no_quoted = static_cast<bool>(flags & ESCAPE_NO_QUOTED);
|
|
|
|
const bool no_tilde = static_cast<bool>(flags & ESCAPE_NO_TILDE);
|
2018-05-06 02:44:57 +00:00
|
|
|
const bool no_qmark = feature_test(features_t::qmark_noglob);
|
2022-07-25 17:18:53 +00:00
|
|
|
const bool symbolic = static_cast<bool>(flags & ESCAPE_SYMBOLIC) && (MB_CUR_MAX > 1);
|
|
|
|
assert((!symbolic || !escape_printables) && "symbolic implies escape-no-printables");
|
2005-10-14 11:40:33 +00:00
|
|
|
|
2020-09-24 15:21:49 +00:00
|
|
|
bool need_escape = false;
|
|
|
|
bool need_complex_escape = false;
|
2005-10-14 11:40:33 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (!no_quoted && in_len == 0) {
|
2014-01-08 23:06:09 +00:00
|
|
|
out.assign(L"''");
|
|
|
|
return;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2012-05-09 09:33:42 +00:00
|
|
|
|
2018-01-02 14:59:08 +00:00
|
|
|
for (size_t i = 0; i < in_len; i++) {
|
2016-04-27 23:10:14 +00:00
|
|
|
if ((*in >= ENCODE_DIRECT_BASE) && (*in < ENCODE_DIRECT_BASE + 256)) {
|
2012-11-19 00:30:30 +00:00
|
|
|
int val = *in - ENCODE_DIRECT_BASE;
|
|
|
|
int tmp;
|
2012-05-14 03:49:14 +00:00
|
|
|
|
2014-01-08 23:06:09 +00:00
|
|
|
out += L'\\';
|
|
|
|
out += L'X';
|
2012-02-27 04:11:34 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
tmp = val / 16;
|
|
|
|
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
2006-10-19 11:50:23 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
tmp = val % 16;
|
|
|
|
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2006-11-17 14:58:25 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2012-11-19 00:30:30 +00:00
|
|
|
wchar_t c = *in;
|
2016-04-27 23:10:14 +00:00
|
|
|
switch (c) {
|
|
|
|
case L'\t': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'␉';
|
|
|
|
else
|
|
|
|
out += L"\\t";
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
|
|
|
case L'\n': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'';
|
|
|
|
else
|
|
|
|
out += L"\\n";
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
|
|
|
case L'\b': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'␈';
|
|
|
|
else
|
|
|
|
out += L"\\b";
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
|
|
|
case L'\r': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'␍';
|
|
|
|
else
|
|
|
|
out += L"\\r";
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
2018-06-18 05:01:32 +00:00
|
|
|
case L'\x1B': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'␛';
|
|
|
|
else
|
|
|
|
out += L"\\e";
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2021-01-16 11:49:49 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case L'\x7F': {
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic)
|
|
|
|
out += L'␡';
|
|
|
|
else
|
|
|
|
out += L"\\x7f";
|
2021-01-16 11:49:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
2012-11-19 08:31:03 +00:00
|
|
|
case L'\\':
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'\'': {
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
2022-07-25 17:18:53 +00:00
|
|
|
if (escape_printables || (c == L'\\' && !symbolic)) out += L'\\';
|
2014-01-08 23:06:09 +00:00
|
|
|
out += *in;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2018-05-06 02:11:57 +00:00
|
|
|
case ANY_CHAR: {
|
|
|
|
// See #1614
|
|
|
|
out += L'?';
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case ANY_STRING: {
|
2014-08-17 02:25:36 +00:00
|
|
|
out += L'*';
|
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
|
|
|
case ANY_STRING_RECURSIVE: {
|
2014-08-17 02:25:36 +00:00
|
|
|
out += L"**";
|
|
|
|
break;
|
2016-04-27 23:10:14 +00:00
|
|
|
}
|
2018-04-24 22:53:30 +00:00
|
|
|
|
2012-11-19 08:31:03 +00:00
|
|
|
case L'&':
|
|
|
|
case L'$':
|
|
|
|
case L' ':
|
|
|
|
case L'#':
|
|
|
|
case L'<':
|
|
|
|
case L'>':
|
|
|
|
case L'(':
|
|
|
|
case L')':
|
|
|
|
case L'[':
|
|
|
|
case L']':
|
|
|
|
case L'{':
|
|
|
|
case L'}':
|
2018-05-06 02:11:57 +00:00
|
|
|
case L'?':
|
2012-11-19 08:31:03 +00:00
|
|
|
case L'*':
|
|
|
|
case L'|':
|
|
|
|
case L';':
|
|
|
|
case L'"':
|
2018-10-10 21:26:29 +00:00
|
|
|
case L'%':
|
2019-09-19 06:32:40 +00:00
|
|
|
case L'~': {
|
2022-05-30 23:14:32 +00:00
|
|
|
bool char_is_normal = (c == L'~' && no_tilde) || (c == L'?' && no_qmark);
|
2018-04-24 22:53:30 +00:00
|
|
|
if (!char_is_normal) {
|
2020-09-24 15:21:49 +00:00
|
|
|
need_escape = true;
|
2022-07-25 14:25:04 +00:00
|
|
|
if (escape_printables) out += L'\\';
|
2012-11-19 08:31:03 +00:00
|
|
|
}
|
2014-01-08 23:06:09 +00:00
|
|
|
out += *in;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
default: {
|
2022-07-25 21:54:00 +00:00
|
|
|
if (*in >= 0 && *in < 32) {
|
2022-07-25 17:17:26 +00:00
|
|
|
need_escape = need_complex_escape = true;
|
|
|
|
|
2022-07-25 17:18:53 +00:00
|
|
|
if (symbolic) {
|
|
|
|
out += L'\u2400' + *in;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2022-07-25 21:54:00 +00:00
|
|
|
if (*in < 27 && *in != 0) {
|
2014-01-08 23:06:09 +00:00
|
|
|
out += L'\\';
|
|
|
|
out += L'c';
|
2016-04-27 23:10:14 +00:00
|
|
|
out += L'a' + *in - 1;
|
2012-11-19 08:31:03 +00:00
|
|
|
break;
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
int tmp = (*in) % 16;
|
2014-01-08 23:06:09 +00:00
|
|
|
out += L'\\';
|
|
|
|
out += L'x';
|
2016-04-27 23:10:14 +00:00
|
|
|
out += ((*in > 15) ? L'1' : L'0');
|
|
|
|
out += tmp > 9 ? L'a' + (tmp - 10) : L'0' + tmp;
|
|
|
|
} else {
|
2014-01-08 23:06:09 +00:00
|
|
|
out += *in;
|
2012-11-19 08:31:03 +00:00
|
|
|
}
|
|
|
|
break;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
in++;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Use quoted escaping if possible, since most people find it easier to read.
|
2022-07-25 14:25:04 +00:00
|
|
|
if (!no_quoted && need_escape && !need_complex_escape && escape_printables) {
|
2014-01-08 23:06:09 +00:00
|
|
|
wchar_t single_quote = L'\'';
|
|
|
|
out.clear();
|
|
|
|
out.reserve(2 + in_len);
|
|
|
|
out.push_back(single_quote);
|
|
|
|
out.append(orig_in, in_len);
|
|
|
|
out.push_back(single_quote);
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
2014-01-08 23:06:09 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2018-11-15 05:30:11 +00:00
|
|
|
/// Escapes a string for use in a regex string. Not safe for use with `eval` as only
|
2022-01-07 18:35:21 +00:00
|
|
|
/// characters reserved by PCRE2 are escaped.
|
2018-11-15 05:30:11 +00:00
|
|
|
/// \param in is the raw string to be searched for literally when substituted in a PCRE2 expression.
|
|
|
|
static wcstring escape_string_pcre2(const wcstring &in) {
|
|
|
|
wcstring out;
|
2019-05-05 10:09:25 +00:00
|
|
|
out.reserve(in.size() * 1.3); // a wild guess
|
2018-11-15 05:30:11 +00:00
|
|
|
|
|
|
|
for (auto c : in) {
|
|
|
|
switch (c) {
|
|
|
|
case L'.':
|
|
|
|
case L'^':
|
|
|
|
case L'$':
|
|
|
|
case L'*':
|
|
|
|
case L'+':
|
|
|
|
case L'(':
|
|
|
|
case L')':
|
|
|
|
case L'?':
|
|
|
|
case L'[':
|
|
|
|
case L'{':
|
|
|
|
case L'}':
|
|
|
|
case L'\\':
|
|
|
|
case L'|':
|
2019-05-05 10:09:25 +00:00
|
|
|
// these two only *need* to be escaped within a character class, and technically it
|
|
|
|
// makes no sense to ever use process substitution output to compose a character class,
|
|
|
|
// but...
|
2018-11-15 05:30:11 +00:00
|
|
|
case L'-':
|
|
|
|
case L']':
|
|
|
|
out.push_back('\\');
|
2022-10-27 03:58:36 +00:00
|
|
|
__fallthrough__
|
2018-11-15 05:30:11 +00:00
|
|
|
default:
|
|
|
|
out.push_back(c);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
2017-06-21 04:55:16 +00:00
|
|
|
wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style) {
|
2014-09-26 01:20:03 +00:00
|
|
|
wcstring result;
|
2017-06-21 04:55:16 +00:00
|
|
|
|
|
|
|
switch (style) {
|
|
|
|
case STRING_STYLE_SCRIPT: {
|
2019-03-12 21:06:01 +00:00
|
|
|
escape_string_script(in, std::wcslen(in), result, flags);
|
2017-06-21 04:55:16 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_URL: {
|
|
|
|
escape_string_url(in, result);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_VAR: {
|
|
|
|
escape_string_var(in, result);
|
|
|
|
break;
|
|
|
|
}
|
2018-11-17 02:21:05 +00:00
|
|
|
case STRING_STYLE_REGEX: {
|
2018-11-15 05:30:11 +00:00
|
|
|
result = escape_string_pcre2(in);
|
|
|
|
break;
|
|
|
|
}
|
2017-06-21 04:55:16 +00:00
|
|
|
}
|
|
|
|
|
2014-09-26 01:20:03 +00:00
|
|
|
return result;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2017-06-21 04:55:16 +00:00
|
|
|
wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style) {
|
2014-01-08 23:06:09 +00:00
|
|
|
wcstring result;
|
2017-06-21 04:55:16 +00:00
|
|
|
|
|
|
|
switch (style) {
|
|
|
|
case STRING_STYLE_SCRIPT: {
|
|
|
|
escape_string_script(in.c_str(), in.size(), result, flags);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_URL: {
|
2018-01-03 11:29:01 +00:00
|
|
|
escape_string_url(in, result);
|
2017-06-21 04:55:16 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_VAR: {
|
2018-01-03 11:29:01 +00:00
|
|
|
escape_string_var(in, result);
|
2017-06-21 04:55:16 +00:00
|
|
|
break;
|
|
|
|
}
|
2018-11-17 02:21:05 +00:00
|
|
|
case STRING_STYLE_REGEX: {
|
2018-11-15 05:30:11 +00:00
|
|
|
result = escape_string_pcre2(in);
|
|
|
|
break;
|
|
|
|
}
|
2017-06-21 04:55:16 +00:00
|
|
|
}
|
|
|
|
|
2012-11-19 00:30:30 +00:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2019-09-14 18:56:02 +00:00
|
|
|
/// Helper to return the last character in a string, or none.
|
|
|
|
static maybe_t<wchar_t> string_last_char(const wcstring &str) {
|
|
|
|
if (str.empty()) return none();
|
|
|
|
return str.back();
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
/// Given a null terminated string starting with a backslash, read the escape as if it is unquoted,
|
2021-05-05 23:23:06 +00:00
|
|
|
/// appending to result. Return the number of characters consumed, or none on error.
|
2018-09-28 02:22:55 +00:00
|
|
|
maybe_t<size_t> read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete,
|
|
|
|
bool unescape_special) {
|
|
|
|
assert(input[0] == L'\\' && "Not an escape");
|
2013-11-25 06:57:49 +00:00
|
|
|
|
2019-09-14 19:54:38 +00:00
|
|
|
// Here's the character we'll ultimately append, or none. Note that L'\0' is a
|
2016-04-27 23:10:14 +00:00
|
|
|
// valid thing to append.
|
2019-09-14 19:54:38 +00:00
|
|
|
maybe_t<wchar_t> result_char_or_none = none();
|
2013-11-25 06:57:49 +00:00
|
|
|
|
|
|
|
bool errored = false;
|
2016-04-27 23:10:14 +00:00
|
|
|
size_t in_pos = 1; // in_pos always tracks the next character to read (and therefore the number
|
|
|
|
// of characters read so far)
|
2013-11-25 06:57:49 +00:00
|
|
|
|
2022-09-29 14:53:16 +00:00
|
|
|
// For multibyte \X sequences.
|
|
|
|
std::string byte_buff;
|
2022-10-06 02:56:09 +00:00
|
|
|
while (true) {
|
2022-09-29 14:53:16 +00:00
|
|
|
const wchar_t c = input[in_pos++];
|
|
|
|
switch (c) {
|
|
|
|
// A null character after a backslash is an error.
|
|
|
|
case L'\0': {
|
|
|
|
// Adjust in_pos to only include the backslash.
|
|
|
|
assert(in_pos > 0);
|
|
|
|
in_pos--;
|
|
|
|
|
|
|
|
// It's an error, unless we're allowing incomplete escapes.
|
|
|
|
if (!allow_incomplete) errored = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// Numeric escape sequences. No prefix means octal escape, otherwise hexadecimal.
|
|
|
|
case L'0':
|
|
|
|
case L'1':
|
|
|
|
case L'2':
|
|
|
|
case L'3':
|
|
|
|
case L'4':
|
|
|
|
case L'5':
|
|
|
|
case L'6':
|
|
|
|
case L'7':
|
|
|
|
case L'u':
|
|
|
|
case L'U':
|
|
|
|
case L'x':
|
|
|
|
case L'X': {
|
|
|
|
long long res = 0;
|
|
|
|
size_t chars = 2;
|
|
|
|
int base = 16;
|
|
|
|
bool byte_literal = false;
|
|
|
|
wchar_t max_val = ASCII_MAX;
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case L'u': {
|
|
|
|
chars = 4;
|
|
|
|
max_val = UCS2_MAX;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case L'U': {
|
|
|
|
chars = 8;
|
|
|
|
max_val = WCHAR_MAX;
|
2014-01-15 09:40:40 +00:00
|
|
|
|
2022-09-29 14:53:16 +00:00
|
|
|
// Don't exceed the largest Unicode code point - see #1107.
|
|
|
|
if (0x10FFFF < max_val) max_val = static_cast<wchar_t>(0x10FFFF);
|
|
|
|
break;
|
|
|
|
}
|
2022-09-29 17:27:18 +00:00
|
|
|
case L'x':
|
2022-09-29 14:53:16 +00:00
|
|
|
case L'X': {
|
|
|
|
byte_literal = true;
|
|
|
|
max_val = BYTE_MAX;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: {
|
|
|
|
base = 8;
|
|
|
|
chars = 3;
|
|
|
|
// Note that in_pos currently is just after the first post-backslash
|
|
|
|
// character; we want to start our escape from there.
|
|
|
|
assert(in_pos > 0);
|
|
|
|
in_pos--;
|
|
|
|
break;
|
|
|
|
}
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2022-09-29 14:53:16 +00:00
|
|
|
|
|
|
|
for (size_t i = 0; i < chars; i++) {
|
|
|
|
long d = convert_digit(input[in_pos], base);
|
|
|
|
if (d < 0) {
|
|
|
|
// If we have no digit, this is a tokenizer error.
|
|
|
|
if (i == 0) errored = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
res = (res * base) + d;
|
|
|
|
in_pos++;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2022-09-29 14:53:16 +00:00
|
|
|
if (!errored && res <= max_val) {
|
|
|
|
if (byte_literal) {
|
|
|
|
// Multibyte encodings necessitate that we keep adjacent byte escapes.
|
|
|
|
// - `\Xc3\Xb6` is "ö", but only together.
|
|
|
|
// (this assumes a valid codepoint can't consist of multiple bytes
|
|
|
|
// that are valid on their own, which is true for UTF-8)
|
|
|
|
byte_buff.push_back(static_cast<char>(res));
|
|
|
|
result_char_or_none = none();
|
2022-09-29 17:27:18 +00:00
|
|
|
if (input[in_pos] == L'\\'
|
|
|
|
&& (input[in_pos + 1] == L'X' || input[in_pos + 1] == L'x')) {
|
2022-09-29 14:53:16 +00:00
|
|
|
in_pos++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
result_char_or_none = static_cast<wchar_t>(res);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
errored = true;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2022-09-29 14:53:16 +00:00
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2022-09-29 14:53:16 +00:00
|
|
|
// \a means bell (alert).
|
|
|
|
case L'a': {
|
|
|
|
result_char_or_none = L'\a';
|
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2022-09-29 14:53:16 +00:00
|
|
|
// \b means backspace.
|
|
|
|
case L'b': {
|
|
|
|
result_char_or_none = L'\b';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \cX means control sequence X.
|
|
|
|
case L'c': {
|
|
|
|
const wchar_t sequence_char = input[in_pos++];
|
|
|
|
if (sequence_char >= L'a' && sequence_char <= (L'a' + 32)) {
|
|
|
|
result_char_or_none = sequence_char - L'a' + 1;
|
|
|
|
} else if (sequence_char >= L'A' && sequence_char <= (L'A' + 32)) {
|
|
|
|
result_char_or_none = sequence_char - L'A' + 1;
|
|
|
|
} else {
|
|
|
|
errored = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \x1B means escape.
|
|
|
|
case L'e': {
|
|
|
|
result_char_or_none = L'\x1B';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \f means form feed.
|
|
|
|
case L'f': {
|
|
|
|
result_char_or_none = L'\f';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \n means newline.
|
|
|
|
case L'n': {
|
|
|
|
result_char_or_none = L'\n';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \r means carriage return.
|
|
|
|
case L'r': {
|
|
|
|
result_char_or_none = L'\r';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \t means tab.
|
|
|
|
case L't': {
|
|
|
|
result_char_or_none = L'\t';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// \v means vertical tab.
|
|
|
|
case L'v': {
|
|
|
|
result_char_or_none = L'\v';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// If a backslash is followed by an actual newline, swallow them both.
|
|
|
|
case L'\n': {
|
|
|
|
result_char_or_none = none();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default: {
|
|
|
|
if (unescape_special) result->push_back(INTERNAL_SEPARATOR);
|
|
|
|
result_char_or_none = c;
|
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
}
|
2022-09-29 14:53:16 +00:00
|
|
|
|
|
|
|
if (errored) return none();
|
|
|
|
|
|
|
|
if (!byte_buff.empty()) {
|
|
|
|
result->append(str2wcstring(byte_buff));
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2022-09-29 14:53:16 +00:00
|
|
|
|
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2022-09-29 14:53:16 +00:00
|
|
|
if (result_char_or_none.has_value()) {
|
2019-09-14 19:54:38 +00:00
|
|
|
result->push_back(*result_char_or_none);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2018-09-28 02:22:55 +00:00
|
|
|
|
|
|
|
return in_pos;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
/// Returns the unescaped version of input_str into output_str (by reference). Returns true if
|
2021-11-27 20:46:15 +00:00
|
|
|
/// successful. If false, the contents of output_str are unchanged.
|
2016-04-27 23:10:14 +00:00
|
|
|
static bool unescape_string_internal(const wchar_t *const input, const size_t input_len,
|
|
|
|
wcstring *output_str, unescape_flags_t flags) {
|
|
|
|
// Set up result string, which we'll swap with the output on success.
|
2013-11-25 06:57:49 +00:00
|
|
|
wcstring result;
|
|
|
|
result.reserve(input_len);
|
|
|
|
|
2016-10-21 04:14:40 +00:00
|
|
|
const bool unescape_special = static_cast<bool>(flags & UNESCAPE_SPECIAL);
|
|
|
|
const bool allow_incomplete = static_cast<bool>(flags & UNESCAPE_INCOMPLETE);
|
2020-03-08 13:03:15 +00:00
|
|
|
const bool ignore_backslashes = static_cast<bool>(flags & UNESCAPE_NO_BACKSLASHES);
|
2013-11-25 06:57:49 +00:00
|
|
|
|
2019-05-18 18:31:41 +00:00
|
|
|
// The positions of open braces.
|
|
|
|
std::vector<size_t> braces;
|
|
|
|
// The positions of variable expansions or brace ","s.
|
|
|
|
// We only read braces as expanders if there's a variable expansion or "," in them.
|
|
|
|
std::vector<size_t> vars_or_seps;
|
2018-03-12 00:51:54 +00:00
|
|
|
int brace_count = 0;
|
2013-11-25 06:57:49 +00:00
|
|
|
|
|
|
|
bool errored = false;
|
2018-09-28 02:28:39 +00:00
|
|
|
enum {
|
|
|
|
mode_unquoted,
|
|
|
|
mode_single_quotes,
|
|
|
|
mode_double_quotes,
|
|
|
|
} mode = mode_unquoted;
|
2013-11-25 06:57:49 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
for (size_t input_position = 0; input_position < input_len && !errored; input_position++) {
|
2013-11-25 06:57:49 +00:00
|
|
|
const wchar_t c = input[input_position];
|
2019-09-14 20:17:22 +00:00
|
|
|
// Here's the character we'll append to result, or none() to suppress it.
|
|
|
|
maybe_t<wchar_t> to_append_or_none = c;
|
2016-04-27 23:10:14 +00:00
|
|
|
if (mode == mode_unquoted) {
|
|
|
|
switch (c) {
|
|
|
|
case L'\\': {
|
2020-03-08 13:03:15 +00:00
|
|
|
if (!ignore_backslashes) {
|
2020-03-26 19:45:40 +00:00
|
|
|
// Backslashes (escapes) are complicated and may result in errors, or
|
|
|
|
// appending INTERNAL_SEPARATORs, so we have to handle them specially.
|
|
|
|
auto escape_chars = read_unquoted_escape(
|
|
|
|
input + input_position, &result, allow_incomplete, unescape_special);
|
2022-10-08 16:56:38 +00:00
|
|
|
if (!escape_chars.has_value()) {
|
2020-03-08 13:03:15 +00:00
|
|
|
// A none() return indicates an error.
|
|
|
|
errored = true;
|
|
|
|
} else {
|
2020-03-26 19:45:40 +00:00
|
|
|
// Skip over the characters we read, minus one because the outer loop
|
|
|
|
// will increment it.
|
2020-03-08 13:03:15 +00:00
|
|
|
assert(*escape_chars > 0);
|
|
|
|
input_position += *escape_chars - 1;
|
|
|
|
}
|
|
|
|
// We've already appended, don't append anything else.
|
|
|
|
to_append_or_none = none();
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'~': {
|
|
|
|
if (unescape_special && (input_position == 0)) {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = HOME_DIRECTORY;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-10-10 21:26:29 +00:00
|
|
|
case L'%': {
|
|
|
|
// Note that this only recognizes %self if the string is literally %self.
|
|
|
|
// %self/foo will NOT match this.
|
|
|
|
if (unescape_special && input_position == 0 &&
|
2019-03-12 21:06:01 +00:00
|
|
|
!std::wcscmp(input, PROCESS_EXPAND_SELF_STR)) {
|
2018-10-10 21:26:29 +00:00
|
|
|
to_append_or_none = PROCESS_EXPAND_SELF;
|
2022-09-20 18:58:37 +00:00
|
|
|
input_position += PROCESS_EXPAND_SELF_STR_LEN - 1; // skip over 'self's
|
2018-10-10 21:26:29 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'*': {
|
|
|
|
if (unescape_special) {
|
|
|
|
// In general, this is ANY_STRING. But as a hack, if the last appended char
|
|
|
|
// is ANY_STRING, delete the last char and store ANY_STRING_RECURSIVE to
|
|
|
|
// reflect the fact that ** is the recursive wildcard.
|
|
|
|
if (string_last_char(result) == ANY_STRING) {
|
2019-11-19 00:56:46 +00:00
|
|
|
assert(!result.empty());
|
2013-11-25 06:57:49 +00:00
|
|
|
result.resize(result.size() - 1);
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = ANY_STRING_RECURSIVE;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = ANY_STRING;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-05-06 02:11:57 +00:00
|
|
|
case L'?': {
|
2018-05-06 02:44:57 +00:00
|
|
|
if (unescape_special && !feature_test(features_t::qmark_noglob)) {
|
2018-05-06 02:11:57 +00:00
|
|
|
to_append_or_none = ANY_CHAR;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'$': {
|
|
|
|
if (unescape_special) {
|
2021-07-02 21:11:03 +00:00
|
|
|
bool is_cmdsub =
|
|
|
|
input_position + 1 < input_len && input[input_position + 1] == L'(';
|
|
|
|
if (!is_cmdsub) {
|
|
|
|
to_append_or_none = VARIABLE_EXPAND;
|
|
|
|
vars_or_seps.push_back(input_position);
|
|
|
|
}
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'{': {
|
|
|
|
if (unescape_special) {
|
2018-03-12 00:51:54 +00:00
|
|
|
brace_count++;
|
2018-03-10 19:16:07 +00:00
|
|
|
to_append_or_none = BRACE_BEGIN;
|
2019-09-14 20:18:04 +00:00
|
|
|
// We need to store where the brace *ends up* in the output.
|
2019-05-18 18:31:41 +00:00
|
|
|
braces.push_back(result.size());
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'}': {
|
|
|
|
if (unescape_special) {
|
2018-05-02 14:33:28 +00:00
|
|
|
// HACK: The completion machinery sometimes hands us partial tokens.
|
|
|
|
// We can't parse them properly, but it shouldn't hurt,
|
|
|
|
// so we don't assert here.
|
|
|
|
// See #4954.
|
2018-09-28 02:28:39 +00:00
|
|
|
// assert(brace_count > 0 && "imbalanced brackets are a tokenizer error, we
|
|
|
|
// shouldn't be able to get here");
|
2018-03-12 00:51:54 +00:00
|
|
|
brace_count--;
|
2018-03-10 19:16:07 +00:00
|
|
|
to_append_or_none = BRACE_END;
|
2019-11-19 00:56:46 +00:00
|
|
|
if (!braces.empty()) {
|
2022-11-16 20:10:01 +00:00
|
|
|
// HACK: To reduce accidental use of brace expansion, treat a brace
|
|
|
|
// with zero or one items as literal input. See #4632. (The hack is
|
|
|
|
// doing it here and like this.)
|
2019-11-19 00:56:46 +00:00
|
|
|
if (vars_or_seps.empty() || vars_or_seps.back() < braces.back()) {
|
2019-05-18 18:31:41 +00:00
|
|
|
result[braces.back()] = L'{';
|
|
|
|
// We also need to turn all spaces back.
|
|
|
|
for (size_t i = braces.back() + 1; i < result.size(); i++) {
|
|
|
|
if (result[i] == BRACE_SPACE) result[i] = L' ';
|
|
|
|
}
|
|
|
|
to_append_or_none = L'}';
|
|
|
|
}
|
|
|
|
|
2019-05-28 02:47:13 +00:00
|
|
|
// Remove all seps inside the current brace pair, so if we have a
|
|
|
|
// surrounding pair we only get seps inside *that*.
|
2019-11-19 00:56:46 +00:00
|
|
|
if (!vars_or_seps.empty()) {
|
|
|
|
while (!vars_or_seps.empty() && vars_or_seps.back() > braces.back())
|
2019-05-28 02:47:13 +00:00
|
|
|
vars_or_seps.pop_back();
|
2019-05-18 18:31:41 +00:00
|
|
|
}
|
|
|
|
braces.pop_back();
|
|
|
|
}
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L',': {
|
2018-03-12 00:51:54 +00:00
|
|
|
if (unescape_special && brace_count > 0) {
|
2018-03-10 19:16:07 +00:00
|
|
|
to_append_or_none = BRACE_SEP;
|
2019-05-18 18:31:41 +00:00
|
|
|
vars_or_seps.push_back(input_position);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2018-03-12 00:51:54 +00:00
|
|
|
case L' ': {
|
2018-03-12 03:02:43 +00:00
|
|
|
if (unescape_special && brace_count > 0) {
|
2020-02-03 01:51:04 +00:00
|
|
|
to_append_or_none = BRACE_SPACE;
|
2018-03-12 03:02:43 +00:00
|
|
|
}
|
2018-03-12 00:51:54 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'\'': {
|
2013-11-25 06:57:49 +00:00
|
|
|
mode = mode_single_quotes;
|
2019-09-14 20:17:22 +00:00
|
|
|
to_append_or_none =
|
|
|
|
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'\"': {
|
2013-11-25 06:57:49 +00:00
|
|
|
mode = mode_double_quotes;
|
2019-09-14 20:17:22 +00:00
|
|
|
to_append_or_none =
|
|
|
|
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2018-03-12 03:02:43 +00:00
|
|
|
default: {
|
|
|
|
break;
|
|
|
|
}
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (mode == mode_single_quotes) {
|
|
|
|
if (c == L'\\') {
|
|
|
|
// A backslash may or may not escape something in single quotes.
|
|
|
|
switch (input[input_position + 1]) {
|
2013-11-25 06:57:49 +00:00
|
|
|
case '\\':
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'\'': {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = input[input_position + 1];
|
2016-04-27 23:10:14 +00:00
|
|
|
input_position += 1; // skip over the backslash
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case L'\0': {
|
|
|
|
if (!allow_incomplete) {
|
2013-11-25 06:57:49 +00:00
|
|
|
errored = true;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
|
|
|
// PCA this line had the following cryptic comment: 'We may ever escape
|
|
|
|
// a NULL character, but still appending a \ in case I am wrong.' Not
|
|
|
|
// sure what it means or the importance of this.
|
2013-11-25 06:57:49 +00:00
|
|
|
input_position += 1; /* Skip over the backslash */
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = L'\\';
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
default: {
|
|
|
|
// Literal backslash that doesn't escape anything! Leave things alone; we'll
|
|
|
|
// append the backslash itself.
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (c == L'\'') {
|
2019-09-14 20:17:22 +00:00
|
|
|
to_append_or_none =
|
|
|
|
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
2013-11-25 06:57:49 +00:00
|
|
|
mode = mode_unquoted;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (mode == mode_double_quotes) {
|
|
|
|
switch (c) {
|
|
|
|
case L'"': {
|
2013-11-25 06:57:49 +00:00
|
|
|
mode = mode_unquoted;
|
2019-09-14 20:17:22 +00:00
|
|
|
to_append_or_none =
|
|
|
|
unescape_special ? maybe_t<wchar_t>(INTERNAL_SEPARATOR) : none();
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case '\\': {
|
|
|
|
switch (input[input_position + 1]) {
|
|
|
|
case L'\0': {
|
|
|
|
if (!allow_incomplete) {
|
2013-11-25 06:57:49 +00:00
|
|
|
errored = true;
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = L'\0';
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
break;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
case '\\':
|
|
|
|
case L'$':
|
2016-04-27 23:10:14 +00:00
|
|
|
case '"': {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = input[input_position + 1];
|
2013-11-25 06:57:49 +00:00
|
|
|
input_position += 1; /* Skip over the backslash */
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case '\n': {
|
2013-11-25 06:57:49 +00:00
|
|
|
/* Swallow newline */
|
2019-09-14 20:17:22 +00:00
|
|
|
to_append_or_none = none();
|
2013-11-26 09:39:16 +00:00
|
|
|
input_position += 1; /* Skip over the backslash */
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
default: {
|
|
|
|
/* Literal backslash that doesn't escape anything! Leave things alone;
|
|
|
|
* we'll append the backslash itself */
|
2013-11-25 06:57:49 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
case '$': {
|
|
|
|
if (unescape_special) {
|
2014-05-14 05:30:41 +00:00
|
|
|
to_append_or_none = VARIABLE_EXPAND_SINGLE;
|
2019-05-18 18:31:41 +00:00
|
|
|
vars_or_seps.push_back(input_position);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2019-05-05 10:09:25 +00:00
|
|
|
default: {
|
|
|
|
break;
|
|
|
|
}
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Now maybe append the char.
|
2019-09-14 20:17:22 +00:00
|
|
|
if (to_append_or_none.has_value()) {
|
|
|
|
result.push_back(*to_append_or_none);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Return the string by reference, and then success.
|
|
|
|
if (!errored) {
|
2017-01-27 00:14:50 +00:00
|
|
|
*output_str = std::move(result);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
return !errored;
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special) {
|
2019-11-19 02:34:50 +00:00
|
|
|
assert(str != nullptr);
|
2013-11-25 06:57:49 +00:00
|
|
|
wcstring output;
|
|
|
|
bool success = unescape_string_internal(str->c_str(), str->size(), &output, escape_special);
|
2016-04-27 23:10:14 +00:00
|
|
|
if (success) {
|
2017-01-27 00:14:50 +00:00
|
|
|
*str = std::move(output);
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
2021-11-27 20:46:15 +00:00
|
|
|
bool unescape_string(const wchar_t *input, size_t len, wcstring *output,
|
|
|
|
unescape_flags_t escape_special, escape_string_style_t style) {
|
2017-07-04 21:41:45 +00:00
|
|
|
bool success = false;
|
2017-06-23 03:47:54 +00:00
|
|
|
switch (style) {
|
|
|
|
case STRING_STYLE_SCRIPT: {
|
2021-11-27 20:46:15 +00:00
|
|
|
success = unescape_string_internal(input, len, output, escape_special);
|
2017-06-23 03:47:54 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_URL: {
|
|
|
|
success = unescape_string_url(input, output);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case STRING_STYLE_VAR: {
|
|
|
|
success = unescape_string_var(input, output);
|
|
|
|
break;
|
|
|
|
}
|
2018-11-17 02:21:05 +00:00
|
|
|
case STRING_STYLE_REGEX: {
|
2018-11-15 05:30:11 +00:00
|
|
|
// unescaping PCRE2 is not needed/supported, the PCRE2 engine is responsible for that
|
|
|
|
success = false;
|
|
|
|
break;
|
|
|
|
}
|
2017-06-23 03:47:54 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
if (!success) output->clear();
|
2013-11-25 06:57:49 +00:00
|
|
|
return success;
|
|
|
|
}
|
|
|
|
|
2021-11-27 20:46:15 +00:00
|
|
|
bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special,
|
|
|
|
escape_string_style_t style) {
|
|
|
|
return unescape_string(input, std::wcslen(input), output, escape_special, style);
|
|
|
|
}
|
|
|
|
|
2017-06-23 03:47:54 +00:00
|
|
|
bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special,
|
|
|
|
escape_string_style_t style) {
|
2021-11-27 20:46:15 +00:00
|
|
|
return unescape_string(input.c_str(), input.size(), output, escape_special, style);
|
2013-11-25 06:57:49 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
wcstring format_size(long long sz) {
|
2012-11-19 00:30:30 +00:00
|
|
|
wcstring result;
|
2019-11-19 02:34:50 +00:00
|
|
|
const wchar_t *sz_name[] = {L"kB", L"MB", L"GB", L"TB", L"PB", L"EB", L"ZB", L"YB", nullptr};
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
if (sz < 0) {
|
2012-11-19 00:30:30 +00:00
|
|
|
result.append(L"unknown");
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (sz < 1) {
|
2012-11-19 00:30:30 +00:00
|
|
|
result.append(_(L"empty"));
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (sz < 1024) {
|
2012-11-19 00:30:30 +00:00
|
|
|
result.append(format_string(L"%lldB", sz));
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2012-11-19 00:30:30 +00:00
|
|
|
int i;
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
for (i = 0; sz_name[i]; i++) {
|
|
|
|
if (sz < (1024 * 1024) || !sz_name[i + 1]) {
|
2019-11-19 01:08:16 +00:00
|
|
|
long isz = (static_cast<long>(sz)) / 1024;
|
2012-11-19 00:30:30 +00:00
|
|
|
if (isz > 9)
|
2021-03-21 15:05:45 +00:00
|
|
|
result.append(format_string(L"%ld%ls", isz, sz_name[i]));
|
2012-11-19 00:30:30 +00:00
|
|
|
else
|
2019-11-19 01:08:16 +00:00
|
|
|
result.append(
|
|
|
|
format_string(L"%.1f%ls", static_cast<double>(sz) / 1024, sz_name[i]));
|
2012-11-19 00:30:30 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
sz /= 1024;
|
|
|
|
}
|
|
|
|
}
|
2012-02-09 18:14:06 +00:00
|
|
|
return result;
|
2007-10-15 09:51:08 +00:00
|
|
|
}
|
2009-02-02 22:46:45 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
/// Crappy function to extract the most significant digit of an unsigned long long value.
|
|
|
|
static char extract_most_significant_digit(unsigned long long *xp) {
|
2012-02-29 19:27:14 +00:00
|
|
|
unsigned long long place_value = 1;
|
|
|
|
unsigned long long x = *xp;
|
2016-04-27 23:10:14 +00:00
|
|
|
while (x >= 10) {
|
2012-02-29 19:27:14 +00:00
|
|
|
x /= 10;
|
|
|
|
place_value *= 10;
|
|
|
|
}
|
|
|
|
*xp -= (place_value * x);
|
|
|
|
return x + '0';
|
|
|
|
}
|
|
|
|
|
2020-09-08 20:04:44 +00:00
|
|
|
static void append_ull(char *buff, unsigned long long val, size_t *inout_idx, size_t max_len) {
|
2012-02-29 19:27:14 +00:00
|
|
|
size_t idx = *inout_idx;
|
2016-04-27 23:10:14 +00:00
|
|
|
while (val > 0 && idx < max_len) buff[idx++] = extract_most_significant_digit(&val);
|
2012-02-29 19:27:14 +00:00
|
|
|
*inout_idx = idx;
|
|
|
|
}
|
|
|
|
|
2020-09-08 20:04:44 +00:00
|
|
|
static void append_str(char *buff, const char *str, size_t *inout_idx, size_t max_len) {
|
2012-02-29 19:27:14 +00:00
|
|
|
size_t idx = *inout_idx;
|
2016-04-27 23:10:14 +00:00
|
|
|
while (*str && idx < max_len) buff[idx++] = *str++;
|
2012-02-29 19:27:14 +00:00
|
|
|
*inout_idx = idx;
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void format_size_safe(char buff[128], unsigned long long sz) {
|
2012-02-29 19:27:14 +00:00
|
|
|
const size_t buff_size = 128;
|
2016-04-27 23:10:14 +00:00
|
|
|
const size_t max_len = buff_size - 1; // need to leave room for a null terminator
|
2019-03-12 22:07:07 +00:00
|
|
|
std::memset(buff, 0, buff_size);
|
2012-02-29 19:27:14 +00:00
|
|
|
size_t idx = 0;
|
2019-11-19 02:34:50 +00:00
|
|
|
const char *const sz_name[] = {"kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", nullptr};
|
2016-04-27 23:10:14 +00:00
|
|
|
if (sz < 1) {
|
2022-08-27 15:45:52 +00:00
|
|
|
strcpy(buff, "empty");
|
2016-04-27 23:10:14 +00:00
|
|
|
} else if (sz < 1024) {
|
2012-02-29 19:27:14 +00:00
|
|
|
append_ull(buff, sz, &idx, max_len);
|
|
|
|
append_str(buff, "B", &idx, max_len);
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
|
|
|
for (size_t i = 0; sz_name[i]; i++) {
|
|
|
|
if (sz < (1024 * 1024) || !sz_name[i + 1]) {
|
|
|
|
unsigned long long isz = sz / 1024;
|
|
|
|
if (isz > 9) {
|
2012-02-29 19:27:14 +00:00
|
|
|
append_ull(buff, isz, &idx, max_len);
|
2016-04-27 23:10:14 +00:00
|
|
|
} else {
|
2018-02-08 22:54:27 +00:00
|
|
|
append_ull(buff, isz, &idx, max_len);
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
// Maybe append a single fraction digit.
|
2012-02-29 19:27:14 +00:00
|
|
|
unsigned long long remainder = sz % 1024;
|
2016-04-27 23:10:14 +00:00
|
|
|
if (remainder > 0) {
|
2012-02-29 19:27:14 +00:00
|
|
|
char tmp[3] = {'.', extract_most_significant_digit(&remainder), 0};
|
|
|
|
append_str(buff, tmp, &idx, max_len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
append_str(buff, sz_name[i], &idx, max_len);
|
2012-11-19 00:30:30 +00:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
sz /= 1024;
|
|
|
|
}
|
2012-02-29 19:27:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
double timef() {
|
2012-11-19 00:30:30 +00:00
|
|
|
struct timeval tv;
|
2019-11-19 02:34:50 +00:00
|
|
|
assert_with_errno(gettimeofday(&tv, nullptr) != -1);
|
2021-08-27 23:20:30 +00:00
|
|
|
return static_cast<timepoint_t>(tv.tv_sec) + 1e-6 * tv.tv_usec;
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void exit_without_destructors(int code) { _exit(code); }
|
2012-02-28 23:11:46 +00:00
|
|
|
|
2011-12-27 03:18:46 +00:00
|
|
|
extern "C" {
|
2019-02-10 12:20:01 +00:00
|
|
|
[[gnu::noinline]] void debug_thread_error(void) {
|
2019-05-22 01:39:01 +00:00
|
|
|
// Wait for a SIGINT. We can't use sigsuspend() because the signal may be delivered on another
|
|
|
|
// thread.
|
2020-06-20 16:23:36 +00:00
|
|
|
sigchecker_t sigint(topic_t::sighupint);
|
2019-05-26 02:19:03 +00:00
|
|
|
sigint.wait();
|
2011-12-27 03:18:46 +00:00
|
|
|
}
|
2012-01-05 21:58:48 +00:00
|
|
|
}
|
|
|
|
|
2018-12-31 01:25:50 +00:00
|
|
|
void set_main_thread() {
|
2019-05-29 19:33:44 +00:00
|
|
|
// Just call thread_id() once to force increment of thread_id.
|
|
|
|
uint64_t tid = thread_id();
|
|
|
|
assert(tid == 1 && "main thread should have thread ID 1");
|
|
|
|
(void)tid;
|
2018-12-31 01:25:50 +00:00
|
|
|
}
|
2016-04-27 23:10:14 +00:00
|
|
|
|
2018-02-19 02:33:04 +00:00
|
|
|
void configure_thread_assertions_for_testing() { thread_asserts_cfg_for_testing = true; }
|
2012-05-14 03:19:02 +00:00
|
|
|
|
2019-05-05 10:09:25 +00:00
|
|
|
bool is_forked_child() { return is_forked_proc; }
|
2012-02-28 02:43:24 +00:00
|
|
|
|
2018-02-19 02:33:04 +00:00
|
|
|
void setup_fork_guards() {
|
2018-12-31 02:21:36 +00:00
|
|
|
is_forked_proc = false;
|
2019-04-28 22:56:49 +00:00
|
|
|
static std::once_flag fork_guard_flag;
|
|
|
|
std::call_once(fork_guard_flag,
|
|
|
|
[] { pthread_atfork(nullptr, nullptr, [] { is_forked_proc = true; }); });
|
2012-02-28 02:43:24 +00:00
|
|
|
}
|
|
|
|
|
2022-06-19 23:27:06 +00:00
|
|
|
void save_term_foreground_process_group() { initial_fg_process_group = tcgetpgrp(STDIN_FILENO); }
|
2012-11-18 10:16:14 +00:00
|
|
|
|
2020-05-31 21:11:39 +00:00
|
|
|
void restore_term_foreground_process_group_for_exit() {
|
2021-04-05 20:04:05 +00:00
|
|
|
// We wish to restore the tty to the initial owner. There's two ways this can go wrong:
|
|
|
|
// 1. We may steal the tty from someone else (#7060).
|
|
|
|
// 2. The call to tcsetpgrp may deliver SIGSTOP to us, and we will not exit.
|
|
|
|
// Hanging on exit seems worse, so ensure that SIGTTOU is ignored so we do not get SIGSTOP.
|
|
|
|
// Note initial_fg_process_group == 0 is possible with Linux pid namespaces.
|
|
|
|
// This is called during shutdown and from a signal handler. We don't bother to complain on
|
|
|
|
// failure because doing so is unlikely to be noticed.
|
|
|
|
if (initial_fg_process_group > 0 && initial_fg_process_group != getpgrp()) {
|
|
|
|
(void)signal(SIGTTOU, SIG_IGN);
|
2020-05-31 20:51:47 +00:00
|
|
|
(void)tcsetpgrp(STDIN_FILENO, initial_fg_process_group);
|
2012-11-18 10:16:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-29 19:33:44 +00:00
|
|
|
bool is_main_thread() { return thread_id() == 1; }
|
2012-01-05 21:58:48 +00:00
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void assert_is_main_thread(const char *who) {
|
2021-09-29 06:31:47 +00:00
|
|
|
if (!likely(is_main_thread()) && !unlikely(thread_asserts_cfg_for_testing)) {
|
2019-05-30 11:04:40 +00:00
|
|
|
FLOGF(error, L"%s called off of main thread.", who);
|
|
|
|
FLOGF(error, L"Break on debug_thread_error to debug.");
|
2011-12-27 03:18:46 +00:00
|
|
|
debug_thread_error();
|
2012-02-28 02:43:24 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void assert_is_not_forked_child(const char *who) {
|
2021-09-29 06:31:47 +00:00
|
|
|
if (unlikely(is_forked_child())) {
|
2019-05-30 11:04:40 +00:00
|
|
|
FLOGF(error, L"%s called in a forked child.", who);
|
|
|
|
FLOG(error, L"Break on debug_thread_error to debug.");
|
2012-02-28 02:43:24 +00:00
|
|
|
debug_thread_error();
|
2011-12-27 03:18:46 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 23:10:14 +00:00
|
|
|
void assert_is_background_thread(const char *who) {
|
2021-09-29 06:31:47 +00:00
|
|
|
if (unlikely(is_main_thread()) && !unlikely(thread_asserts_cfg_for_testing)) {
|
2019-05-30 11:04:40 +00:00
|
|
|
FLOGF(error, L"%s called on the main thread (may block!).", who);
|
|
|
|
FLOG(error, L"Break on debug_thread_error to debug.");
|
2011-12-27 03:18:46 +00:00
|
|
|
debug_thread_error();
|
2012-02-24 20:13:35 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-06 21:58:35 +00:00
|
|
|
void assert_is_locked(std::mutex &mutex, const char *who, const char *caller) {
|
2018-12-31 02:15:49 +00:00
|
|
|
// Note that std::mutex.try_lock() is allowed to return false when the mutex isn't
|
|
|
|
// actually locked; fortunately we are checking the opposite so we're safe.
|
2021-09-29 06:31:47 +00:00
|
|
|
if (unlikely(mutex.try_lock())) {
|
2019-05-30 11:04:40 +00:00
|
|
|
FLOGF(error, L"%s is not locked when it should be in '%s'", who, caller);
|
|
|
|
FLOG(error, L"Break on debug_thread_error to debug.");
|
2012-02-24 20:13:35 +00:00
|
|
|
debug_thread_error();
|
2021-02-06 21:58:35 +00:00
|
|
|
mutex.unlock();
|
2014-08-24 07:59:03 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-29 03:07:36 +00:00
|
|
|
/// Test if the specified character is in a range that fish uses internally to store special tokens.
|
2016-12-15 03:21:36 +00:00
|
|
|
///
|
|
|
|
/// NOTE: This is used when tokenizing the input. It is also used when reading input, before
|
|
|
|
/// tokenization, to replace such chars with REPLACEMENT_WCHAR if they're not part of a quoted
|
|
|
|
/// string. We don't want external input to be able to feed reserved characters into our
|
|
|
|
/// lexer/parser or code evaluator.
|
2016-10-17 23:23:29 +00:00
|
|
|
//
|
|
|
|
// TODO: Actually implement the replacement as documented above.
|
|
|
|
bool fish_reserved_codepoint(wchar_t c) {
|
|
|
|
return (c >= RESERVED_CHAR_BASE && c < RESERVED_CHAR_END) ||
|
2019-03-24 01:07:32 +00:00
|
|
|
(c >= ENCODE_DIRECT_BASE && c < ENCODE_DIRECT_END);
|
2016-10-17 23:23:29 +00:00
|
|
|
}
|
2016-12-15 03:21:36 +00:00
|
|
|
|
2017-01-11 05:52:10 +00:00
|
|
|
/// Reopen stdin, stdout and/or stderr on /dev/null. This is invoked when we find that our tty has
|
|
|
|
/// become invalid.
|
2016-12-15 03:21:36 +00:00
|
|
|
void redirect_tty_output() {
|
|
|
|
struct termios t;
|
|
|
|
int fd = open("/dev/null", O_WRONLY);
|
2018-02-08 23:05:13 +00:00
|
|
|
if (fd == -1) {
|
|
|
|
__fish_assert("Could not open /dev/null!", __FILE__, __LINE__, errno);
|
|
|
|
}
|
2017-01-11 05:52:10 +00:00
|
|
|
if (tcgetattr(STDIN_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDIN_FILENO);
|
|
|
|
if (tcgetattr(STDOUT_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDOUT_FILENO);
|
|
|
|
if (tcgetattr(STDERR_FILENO, &t) == -1 && errno == EIO) dup2(fd, STDERR_FILENO);
|
2016-12-15 03:21:36 +00:00
|
|
|
close(fd);
|
|
|
|
}
|
2017-02-14 04:37:27 +00:00
|
|
|
|
|
|
|
/// Display a failed assertion message, dump a stack trace if possible, then die.
|
2017-06-17 04:50:08 +00:00
|
|
|
[[noreturn]] void __fish_assert(const char *msg, const char *file, size_t line, int error) {
|
2021-09-29 06:31:47 +00:00
|
|
|
if (unlikely(error)) {
|
2019-05-30 09:54:09 +00:00
|
|
|
FLOGF(error, L"%s:%zu: failed assertion: %s: errno %d (%s)", file, line, msg, error,
|
2019-06-04 03:30:48 +00:00
|
|
|
std::strerror(error));
|
2017-02-15 05:09:15 +00:00
|
|
|
} else {
|
2019-05-30 09:54:09 +00:00
|
|
|
FLOGF(error, L"%s:%zu: failed assertion: %s", file, line, msg);
|
2017-02-15 05:09:15 +00:00
|
|
|
}
|
2022-06-16 08:26:43 +00:00
|
|
|
show_stackframe(99, 1);
|
2017-02-14 04:37:27 +00:00
|
|
|
abort();
|
|
|
|
}
|
2017-04-20 06:43:02 +00:00
|
|
|
|
|
|
|
/// Test if the given char is valid in a variable name.
|
|
|
|
bool valid_var_name_char(wchar_t chr) { return fish_iswalnum(chr) || chr == L'_'; }
|
|
|
|
|
|
|
|
/// Test if the given string is a valid variable name.
|
2019-03-14 17:28:48 +00:00
|
|
|
bool valid_var_name(const wcstring &str) {
|
2021-02-14 02:47:50 +00:00
|
|
|
// Note do not use c_str(), we want to fail on embedded nul bytes.
|
2021-03-14 19:03:56 +00:00
|
|
|
return !str.empty() && std::all_of(str.begin(), str.end(), valid_var_name_char);
|
2021-02-14 02:47:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool valid_var_name(const wchar_t *str) {
|
2021-03-14 19:03:56 +00:00
|
|
|
if (str[0] == L'\0') return false;
|
2021-02-14 02:47:50 +00:00
|
|
|
for (size_t i = 0; str[i] != L'\0'; i++) {
|
|
|
|
if (!valid_var_name_char(str[i])) return false;
|
|
|
|
}
|
|
|
|
return true;
|
2017-04-20 06:43:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Test if the string is a valid function name.
|
|
|
|
bool valid_func_name(const wcstring &str) {
|
2019-03-14 17:28:48 +00:00
|
|
|
if (str.empty()) return false;
|
2017-04-20 06:43:02 +00:00
|
|
|
if (str.at(0) == L'-') return false;
|
2021-08-01 10:23:31 +00:00
|
|
|
// A function name needs to be a valid path, so no / and no NULL.
|
2017-04-20 06:43:02 +00:00
|
|
|
if (str.find_first_of(L'/') != wcstring::npos) return false;
|
2021-08-01 10:23:31 +00:00
|
|
|
if (str.find_first_of(L'\0') != wcstring::npos) return false;
|
2017-04-20 06:43:02 +00:00
|
|
|
return true;
|
|
|
|
}
|
2018-10-10 03:33:20 +00:00
|
|
|
|
|
|
|
/// Return the path to the current executable. This needs to be realpath'd.
|
|
|
|
std::string get_executable_path(const char *argv0) {
|
|
|
|
char buff[PATH_MAX];
|
|
|
|
|
2020-04-05 02:15:08 +00:00
|
|
|
#ifdef __APPLE__
|
2018-10-10 03:33:20 +00:00
|
|
|
// On OS X use it's proprietary API to get the path to the executable.
|
|
|
|
// This is basically grabbing exec_path after argc, argv, envp, ...: for us
|
|
|
|
// https://opensource.apple.com/source/adv_cmds/adv_cmds-163/ps/print.c
|
|
|
|
uint32_t buffSize = sizeof buff;
|
|
|
|
if (_NSGetExecutablePath(buff, &buffSize) == 0) return std::string(buff);
|
2022-07-24 07:26:33 +00:00
|
|
|
#elif defined(__BSD__) && defined(KERN_PROC_PATHNAME)
|
2020-11-20 20:11:03 +00:00
|
|
|
// BSDs do not have /proc by default, (although it can be mounted as procfs via the Linux
|
|
|
|
// compatibility layer). We can use sysctl instead: per sysctl(3), passing in a process ID of -1
|
|
|
|
// returns the value for the current process.
|
2018-10-10 03:33:20 +00:00
|
|
|
size_t buff_size = sizeof buff;
|
2022-07-25 13:57:01 +00:00
|
|
|
#if defined(__NetBSD__)
|
2022-07-24 07:26:33 +00:00
|
|
|
int name[] = {CTL_KERN, KERN_PROC_ARGS, getpid(), KERN_PROC_PATHNAME};
|
2022-07-25 13:57:01 +00:00
|
|
|
#else
|
2019-05-05 10:09:25 +00:00
|
|
|
int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
|
2022-07-25 13:57:01 +00:00
|
|
|
#endif
|
2018-10-10 03:33:20 +00:00
|
|
|
int result = sysctl(name, sizeof(name) / sizeof(int), buff, &buff_size, nullptr, 0);
|
|
|
|
if (result != 0) {
|
|
|
|
wperror(L"sysctl KERN_PROC_PATHNAME");
|
2019-05-05 10:09:25 +00:00
|
|
|
} else {
|
2018-10-10 03:33:20 +00:00
|
|
|
return std::string(buff);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
// On other unixes, fall back to the Linux-ish /proc/ directory
|
|
|
|
ssize_t len;
|
|
|
|
len = readlink("/proc/self/exe", buff, sizeof buff - 1); // Linux
|
|
|
|
if (len == -1) {
|
|
|
|
len = readlink("/proc/curproc/file", buff, sizeof buff - 1); // other BSDs
|
|
|
|
if (len == -1) {
|
|
|
|
len = readlink("/proc/self/path/a.out", buff, sizeof buff - 1); // Solaris
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (len > 0) {
|
|
|
|
buff[len] = '\0';
|
2022-06-16 14:36:05 +00:00
|
|
|
// When /proc/self/exe points to a file that was deleted (or overwritten on update!)
|
|
|
|
// then linux adds a " (deleted)" suffix.
|
|
|
|
// If that's not a valid path, let's remove that awkward suffix.
|
|
|
|
std::string buffstr{buff};
|
|
|
|
if (access(buff, F_OK)) {
|
|
|
|
auto dellen = const_strlen(" (deleted)");
|
2022-06-16 16:43:28 +00:00
|
|
|
if (buffstr.size() > dellen &&
|
|
|
|
buffstr.compare(buffstr.size() - dellen, dellen, " (deleted)") == 0) {
|
2022-06-16 16:43:57 +00:00
|
|
|
buffstr = buffstr.substr(0, buffstr.size() - dellen);
|
2022-06-16 14:36:05 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return buffstr;
|
2018-10-10 03:33:20 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Just return argv0, which probably won't work (i.e. it's not an absolute path or a path
|
|
|
|
// relative to the working directory, but instead something the caller found via $PATH). We'll
|
|
|
|
// eventually fall back to the compile time paths.
|
|
|
|
return std::string(argv0 ? argv0 : "");
|
|
|
|
}
|
|
|
|
|
2019-02-06 04:36:38 +00:00
|
|
|
/// Return a path to a directory where we can store temporary files.
|
|
|
|
std::string get_path_to_tmp_dir() {
|
|
|
|
char *env_tmpdir = getenv("TMPDIR");
|
|
|
|
if (env_tmpdir) {
|
|
|
|
return env_tmpdir;
|
|
|
|
}
|
|
|
|
#if defined(_CS_DARWIN_USER_TEMP_DIR)
|
|
|
|
char osx_tmpdir[PATH_MAX];
|
|
|
|
size_t n = confstr(_CS_DARWIN_USER_TEMP_DIR, osx_tmpdir, PATH_MAX);
|
|
|
|
if (0 < n && n <= PATH_MAX) {
|
|
|
|
return osx_tmpdir;
|
|
|
|
} else {
|
|
|
|
return "/tmp";
|
|
|
|
}
|
|
|
|
#elif defined(P_tmpdir)
|
|
|
|
return P_tmpdir;
|
|
|
|
#elif defined(_PATH_TMP)
|
|
|
|
return _PATH_TMP;
|
|
|
|
#else
|
|
|
|
return "/tmp";
|
|
|
|
#endif
|
|
|
|
}
|
2019-01-30 01:59:41 +00:00
|
|
|
|
|
|
|
// This function attempts to distinguish between a console session (at the actual login vty) and a
|
|
|
|
// session within a terminal emulator inside a desktop environment or over SSH. Unfortunately
|
|
|
|
// there are few values of $TERM that we can interpret as being exclusively console sessions, and
|
|
|
|
// most common operating systems do not use them. The value is cached for the duration of the fish
|
|
|
|
// session. We err on the side of assuming it's not a console session. This approach isn't
|
|
|
|
// bullet-proof and that's OK.
|
|
|
|
bool is_console_session() {
|
2020-10-10 10:50:07 +00:00
|
|
|
static const bool console_session = [] {
|
2022-06-19 23:00:05 +00:00
|
|
|
char tty_name[PATH_MAX];
|
|
|
|
if (ttyname_r(STDIN_FILENO, tty_name, sizeof tty_name) != 0) {
|
|
|
|
return false;
|
|
|
|
}
|
2021-02-08 21:09:10 +00:00
|
|
|
constexpr auto len = const_strlen("/dev/tty");
|
2019-01-30 01:59:41 +00:00
|
|
|
const char *TERM = getenv("TERM");
|
|
|
|
return
|
|
|
|
// Test that the tty matches /dev/(console|dcons|tty[uv\d])
|
2019-05-05 10:09:25 +00:00
|
|
|
((strncmp(tty_name, "/dev/tty", len) == 0 &&
|
|
|
|
(tty_name[len] == 'u' || tty_name[len] == 'v' || isdigit(tty_name[len]))) ||
|
|
|
|
strcmp(tty_name, "/dev/dcons") == 0 || strcmp(tty_name, "/dev/console") == 0)
|
2019-01-30 01:59:41 +00:00
|
|
|
// and that $TERM is simple, e.g. `xterm` or `vt100`, not `xterm-something`
|
|
|
|
&& (!TERM || !strchr(TERM, '-') || !strcmp(TERM, "sun-color"));
|
|
|
|
}();
|
|
|
|
return console_session;
|
|
|
|
}
|