// Prototypes for various functions, mostly string utilities, that are used by most parts of fish. #ifndef FISH_COMMON_H #define FISH_COMMON_H #include "config.h" // IWYU pragma: keep #include #include // Needed for va_list et al. #include // IWYU pragma: keep #ifdef HAVE_SYS_IOCTL_H #include // IWYU pragma: keep #endif #include #include #include #include #include #include #include #include #include "fallback.h" // IWYU pragma: keep #include "maybe.h" // Create a generic define for all BSD platforms #if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) #define __BSD__ #endif // PATH_MAX may not exist. #ifndef PATH_MAX #define PATH_MAX 4096 #endif // Define a symbol we can use elsewhere in our code to determine if we're being built on MS Windows // under Cygwin. #if defined(_WIN32) || defined(_WIN64) || defined(WIN32) || defined(__CYGWIN__) || \ defined(__WIN32__) #define OS_IS_CYGWIN #endif // Check if Thread Sanitizer is enabled. #if defined(__has_feature) #if __has_feature(thread_sanitizer) #define FISH_TSAN_WORKAROUNDS 1 #endif #endif #ifdef __SANITIZE_THREAD__ #define FISH_TSAN_WORKAROUNDS 1 #endif // Common string type. typedef std::wstring wcstring; typedef std::vector wcstring_list_t; struct termsize_t; // Highest legal ASCII value. #define ASCII_MAX 127u // Highest legal 16-bit Unicode value. #define UCS2_MAX 0xFFFFu // Highest legal byte value. #define BYTE_MAX 0xFFu // Unicode BOM value. #define UTF8_BOM_WCHAR 0xFEFFu // Use Unicode "noncharacters" for internal characters as much as we can. This // gives us 32 "characters" for internal use that we can guarantee should not // appear in our input stream. See http://www.unicode.org/faq/private_use.html. #define RESERVED_CHAR_BASE static_cast(0xFDD0) #define RESERVED_CHAR_END static_cast(0xFDF0) // Split the available noncharacter values into two ranges to ensure there are // no conflicts among the places we use these special characters. #define EXPAND_RESERVED_BASE RESERVED_CHAR_BASE #define EXPAND_RESERVED_END (EXPAND_RESERVED_BASE + 16) #define WILDCARD_RESERVED_BASE EXPAND_RESERVED_END #define WILDCARD_RESERVED_END (WILDCARD_RESERVED_BASE + 16) // Make sure the ranges defined above don't exceed the range for noncharacters. // This is to make sure we didn't do something stupid in subdividing the // Unicode range for our needs. //#if WILDCARD_RESERVED_END > RESERVED_CHAR_END //#error //#endif // These are in the Unicode private-use range. We really shouldn't use this // range but have little choice in the matter given how our lexer/parser works. // We can't use non-characters for these two ranges because there are only 66 of // them and we need at least 256 + 64. // // If sizeof(wchar_t))==4 we could avoid using private-use chars; however, that // would result in fish having different behavior on machines with 16 versus 32 // bit wchar_t. It's better that fish behave the same on both types of systems. // // Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know // of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) // on Mac OS X. See http://www.unicode.org/faq/private_use.html. #define ENCODE_DIRECT_BASE static_cast(0xF600) #define ENCODE_DIRECT_END (ENCODE_DIRECT_BASE + 256) // NAME_MAX is not defined on Solaris #if !defined(NAME_MAX) #include #if defined(MAXNAMELEN) // MAXNAMELEN is defined on Linux, BSD, and Solaris among others #define NAME_MAX MAXNAMELEN #else static_assert(false, "Neither NAME_MAX nor MAXNAMELEN is defined!"); #endif #endif // PATH_MAX may not exist. #ifndef PATH_MAX #ifdef MAXPATHLEN #define PATH_MAX MAXPATHLEN #else /// Fallback length of MAXPATHLEN. Hopefully a sane value. #define PATH_MAX 4096 #endif #endif enum escape_string_style_t { STRING_STYLE_SCRIPT, STRING_STYLE_URL, STRING_STYLE_VAR, STRING_STYLE_REGEX, }; // Flags for unescape_string functions. enum { UNESCAPE_DEFAULT = 0, // default behavior UNESCAPE_SPECIAL = 1 << 0, // escape special fish syntax characters like the semicolon UNESCAPE_INCOMPLETE = 1 << 1, // allow incomplete escape sequences UNESCAPE_NO_BACKSLASHES = 1 << 2, // don't handle backslash escapes }; typedef unsigned int unescape_flags_t; // Flags for the escape_string() and escape_string() functions. These are only applicable when the // escape style is "script" (i.e., STRING_STYLE_SCRIPT). enum { /// Escape all characters, including magic characters like the semicolon. ESCAPE_ALL = 1 << 0, /// Do not try to use 'simplified' quoted escapes, and do not use empty quotes as the empty /// string. ESCAPE_NO_QUOTED = 1 << 1, /// Do not escape tildes. ESCAPE_NO_TILDE = 1 << 2 }; typedef unsigned int escape_flags_t; /// A user-visible job ID. using job_id_t = int; /// The non user-visible, never-recycled job ID. /// Every job has a unique positive value for this. using internal_job_id_t = uint64_t; /// Exits without invoking destructors (via _exit), useful for code after fork. [[noreturn]] void exit_without_destructors(int code); /// Save the shell mode on startup so we can restore them on exit. extern struct termios shell_modes; /// The character to use where the text has been truncated. Is an ellipsis on unicode system and a $ /// on other systems. wchar_t get_ellipsis_char(); /// The character or string to use where text has been truncated (ellipsis if possible, otherwise /// ...) const wchar_t *get_ellipsis_str(); /// Character representing an omitted newline at the end of text. const wchar_t *get_omitted_newline_str(); int get_omitted_newline_width(); /// Character used for the silent mode of the read command wchar_t get_obfuscation_read_char(); /// Profiling flag. True if commands should be profiled. extern bool g_profiling_active; /// Name of the current program. Should be set at startup. Used by the debug function. extern const wchar_t *program_name; /// Set to false if it's been determined we can't trust the last modified timestamp on the tty. extern const bool has_working_tty_timestamps; /// A global, empty string. This is useful for functions which wish to return a reference to an /// empty string. extern const wcstring g_empty_string; // Pause for input, then exit the program. If supported, print a backtrace first. #define FATAL_EXIT() \ do { \ char exit_read_buff; \ show_stackframe(L'E'); \ ignore_result(read(0, &exit_read_buff, 1)); \ exit_without_destructors(1); \ } while (0) /// Exit the program at once after emitting an error message and stack trace if possible. /// We use our own private implementation of `assert()` for two reasons. First, some implementations /// are subtly broken. For example, using `printf()` which can cause problems when mixed with wide /// stdio functions and should be writing the message to stderr rather than stdout. Second, if /// possible it is useful to provide additional context such as a stack backtrace. #undef assert #define assert(e) (e) ? ((void)0) : __fish_assert(#e, __FILE__, __LINE__, 0) #define assert_with_errno(e) (e) ? ((void)0) : __fish_assert(#e, __FILE__, __LINE__, errno) #define DIE(msg) __fish_assert(msg, __FILE__, __LINE__, 0) #define DIE_WITH_ERRNO(msg) __fish_assert(msg, __FILE__, __LINE__, errno) /// This macro is meant to be used with functions that return zero on success otherwise return an /// errno value. Most notably the pthread family of functions which we never expect to fail. #define DIE_ON_FAILURE(e) \ do { \ int status = e; \ if (unlikely(status != 0)) { \ __fish_assert(#e, __FILE__, __LINE__, status); \ } \ } while (0) [[noreturn]] void __fish_assert(const char *msg, const char *file, size_t line, int error); /// Shorthand for wgettext call in situations where a C-style string is needed (e.g., /// std::fwprintf()). #define _(wstr) wgettext(wstr).c_str() /// Noop, used to tell xgettext that a string should be translated. Use this when a string cannot be /// passed through wgettext() at the point where it is used. For example, when initializing a /// static array or structure. You must pass the string through wgettext() when it is used. /// See https://developer.gnome.org/glib/stable/glib-I18N.html#N-:CAPS #define N_(wstr) wstr /// An empty struct which may be embedded (or inherited from) to prevent copying. struct [[gnu::unused]] noncopyable_t { noncopyable_t() = default; noncopyable_t(noncopyable_t &&) = default; noncopyable_t &operator=(noncopyable_t &&) = default; noncopyable_t(const noncopyable_t &) = delete; noncopyable_t &operator=(const noncopyable_t &) = delete; }; struct [[gnu::unused]] nonmovable_t { nonmovable_t() = default; nonmovable_t(nonmovable_t &&) = delete; nonmovable_t &operator=(nonmovable_t &&) = delete; }; /// Test if a collection contains a value. template bool contains(const Col &col, const T2 &val) { return std::find(std::begin(col), std::end(col), val) != std::end(col); } /// Append a vector \p donator to the vector \p receiver. template void vec_append(std::vector &receiver, std::vector &&donator) { if (receiver.empty()) { receiver = std::move(donator); } else { receiver.insert(receiver.end(), std::make_move_iterator(donator.begin()), std::make_move_iterator(donator.end())); } } /// Move an object into a shared_ptr. template std::shared_ptr move_to_sharedptr(T &&v) { return std::make_shared(std::move(v)); } /// A function type to check for cancellation. /// \return true if execution should cancel. using cancel_checker_t = std::function; /// Print a stack trace to stderr. void show_stackframe(const wchar_t msg_level, int frame_count = 100, int skip_levels = 0); /// Returns a wide character string equivalent of the specified multibyte character string. /// /// This function encodes illegal character sequences in a reversible way using the private use /// area. wcstring str2wcstring(const char *in); wcstring str2wcstring(const char *in, size_t len); wcstring str2wcstring(const std::string &in); wcstring str2wcstring(const std::string &in, size_t len); /// Returns a newly allocated multibyte character string equivalent of the specified wide character /// string. /// /// This function decodes illegal character sequences in a reversible way using the private use /// area. std::string wcs2string(const wcstring &input); std::string wcs2string(const wchar_t *in, size_t len); /// Like wcs2string, but appends to \p receiver instead of returning a new string. void wcs2string_appending(const wchar_t *in, size_t len, std::string *receiver); // Check if we are running in the test mode, where we should suppress error output #define TESTS_PROGRAM_NAME L"(ignore)" bool should_suppress_stderr_for_tests(); /// Branch prediction hints. Idea borrowed from Linux kernel. Just used for asserts. #define likely(x) __builtin_expect(!!(x), 1) #define unlikely(x) __builtin_expect(!!(x), 0) void assert_is_main_thread(const char *who); #define ASSERT_IS_MAIN_THREAD_TRAMPOLINE(x) assert_is_main_thread(x) #define ASSERT_IS_MAIN_THREAD() ASSERT_IS_MAIN_THREAD_TRAMPOLINE(__FUNCTION__) void assert_is_background_thread(const char *who); #define ASSERT_IS_BACKGROUND_THREAD_TRAMPOLINE(x) assert_is_background_thread(x) #define ASSERT_IS_BACKGROUND_THREAD() ASSERT_IS_BACKGROUND_THREAD_TRAMPOLINE(__FUNCTION__) /// Useful macro for asserting that a lock is locked. This doesn't check whether this thread locked /// it, which it would be nice if it did, but here it is anyways. void assert_is_locked(std::mutex &mutex, const char *who, const char *caller); #define ASSERT_IS_LOCKED(m) assert_is_locked(m, #m, __FUNCTION__) /// Format the specified size (in bytes, kilobytes, etc.) into the specified stringbuffer. wcstring format_size(long long sz); /// Version of format_size that does not allocate memory. void format_size_safe(char buff[128], unsigned long long sz); /// Writes out a long safely. void format_long_safe(char buff[64], long val); void format_long_safe(wchar_t buff[64], long val); void format_ullong_safe(wchar_t buff[64], unsigned long long val); /// "Narrows" a wide character string. This just grabs any ASCII characters and trunactes. void narrow_string_safe(char buff[64], const wchar_t *s); using scoped_lock = std::lock_guard; // An object wrapping a scoped lock and a value // This is returned from owning_lock.acquire() // Sample usage: // owning_lock locked_name; // acquired_lock name = name.acquire(); // name.value = "derp" // // Or for simple cases: // name.acquire().value = "derp" // template class acquired_lock : noncopyable_t { template friend class owning_lock; template friend class acquired_lock; acquired_lock(std::mutex &lk, Data *v) : lock(lk), value(v) {} acquired_lock(std::unique_lock &&lk, Data *v) : lock(std::move(lk)), value(v) {} std::unique_lock lock; Data *value; public: Data *operator->() { return value; } const Data *operator->() const { return value; } Data &operator*() { return *value; } const Data &operator*() const { return *value; } /// Implicit conversion to const version. operator acquired_lock() { // We're about to give up our lock, don't hold onto the data. const Data *cvalue = value; value = nullptr; return acquired_lock(std::move(lock), cvalue); } /// Create from a global lock. /// This is used in weird cases where a global lock protects more than one piece of data. static acquired_lock from_global(std::mutex &lk, Data *v) { return acquired_lock{lk, v}; } /// \return a reference to the lock, for use with a condition variable. std::unique_lock &get_lock() { return lock; } }; // A lock that owns a piece of data // Access to the data is only provided by taking the lock template class owning_lock { // No copying owning_lock &operator=(const scoped_lock &) = delete; owning_lock(const scoped_lock &) = delete; owning_lock(owning_lock &&) = default; owning_lock &operator=(owning_lock &&) = default; std::mutex lock; Data data; public: owning_lock(Data &&d) : data(std::move(d)) {} owning_lock(const Data &d) : data(d) {} owning_lock() : data() {} acquired_lock acquire() { return {lock, &data}; } }; /// A scoped manager to save the current value of some variable, and optionally set it to a new /// value. On destruction it restores the variable to its old value. /// /// This can be handy when there are multiple code paths to exit a block. template class scoped_push { T *const ref; T saved_value; bool restored; public: explicit scoped_push(T *r) : ref(r), saved_value(*r), restored(false) {} scoped_push(T *r, T new_value) : ref(r), restored(false) { saved_value = std::move(*ref); *ref = std::move(new_value); } ~scoped_push() { restore(); } void restore() { if (!restored) { *ref = std::move(saved_value); restored = true; } } }; wcstring format_string(const wchar_t *format, ...); wcstring vformat_string(const wchar_t *format, va_list va_orig); void append_format(wcstring &str, const wchar_t *format, ...); void append_formatv(wcstring &target, const wchar_t *format, va_list va_orig); #ifdef HAVE_STD__MAKE_UNIQUE using std::make_unique; #else /// make_unique implementation template std::unique_ptr make_unique(Args &&...args) { return std::unique_ptr(new T(std::forward(args)...)); } #endif /// This functions returns the end of the quoted substring beginning at \c pos. Returns 0 on error. /// /// \param pos the position of the opening quote. /// \param quote the quote to use, usually pointed to by \c pos. const wchar_t *quote_end(const wchar_t *pos, wchar_t quote); /// This function should be called after calling `setlocale()` to perform fish specific locale /// initialization. void fish_setlocale(); /// Call read, blocking and repeating on EINTR. Exits on EAGAIN. /// \return the number of bytes read, or 0 on EOF. On EAGAIN, returns -1 if nothing was read. long read_blocked(int fd, void *buf, size_t count); /// Loop a write request while failure is non-critical. Return -1 and set errno in case of critical /// error. ssize_t write_loop(int fd, const char *buff, size_t count); /// Loop a read request while failure is non-critical. Return -1 and set errno in case of critical /// error. ssize_t read_loop(int fd, void *buff, size_t count); /// Replace special characters with backslash escape sequences. Newline is replaced with \n, etc. /// /// \param in The string to be escaped /// \param flags Flags to control the escaping /// \return The escaped string wcstring escape_string(const wchar_t *in, escape_flags_t flags, escape_string_style_t style = STRING_STYLE_SCRIPT); wcstring escape_string(const wcstring &in, escape_flags_t flags, escape_string_style_t style = STRING_STYLE_SCRIPT); /// Escape a string so that it may be inserted into a double-quoted string. /// This permits ownership transfer. wcstring escape_string_for_double_quotes(wcstring in); /// \return a string representation suitable for debugging (not for presenting to the user). This /// replaces non-ASCII characters with either tokens like or <\xfdd7>. No other escapes /// are made (i.e. this is a lossy escape). wcstring debug_escape(const wcstring &in); /// Expand backslashed escapes and substitute them with their unescaped counterparts. Also /// optionally change the wildcards, the tilde character and a few more into constants which are /// defined in a private use area of Unicode. This assumes wchar_t is a unicode character set. /// Given a null terminated string starting with a backslash, read the escape as if it is unquoted, /// appending to result. Return the number of characters consumed, or none() on error. maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, bool allow_incomplete, bool unescape_special); /// Unescapes a string in-place. A true result indicates the string was unescaped, a false result /// indicates the string was unmodified. bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special); /// Reverse the effects of calling `escape_string`. Returns the unescaped value by reference. On /// failure, the output is set to an empty string. bool unescape_string(const wchar_t *input, wcstring *output, unescape_flags_t escape_special, escape_string_style_t style = STRING_STYLE_SCRIPT); bool unescape_string(const wcstring &input, wcstring *output, unescape_flags_t escape_special, escape_string_style_t style = STRING_STYLE_SCRIPT); /// Write the given paragraph of output, redoing linebreaks to fit \p termsize. wcstring reformat_for_screen(const wcstring &msg, const termsize_t &termsize); /// Print a short message about how to file a bug report to stderr. void bugreport(); /// Return the number of seconds from the UNIX epoch, with subsecond precision. This function uses /// the gettimeofday function and will have the same precision as that function. using timepoint_t = double; timepoint_t timef(); /// Call the following function early in main to set the main thread. This is our replacement for /// pthread_main_np(). void set_main_thread(); bool is_main_thread(); /// Configures thread assertions for testing. void configure_thread_assertions_for_testing(); /// Set up a guard to complain if we try to do certain things (like take a lock) after calling fork. void setup_fork_guards(void); /// Save the value of tcgetpgrp so we can restore it on exit. void save_term_foreground_process_group(); void restore_term_foreground_process_group_for_exit(); /// Return whether we are the child of a fork. bool is_forked_child(void); void assert_is_not_forked_child(const char *who); #define ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(x) assert_is_not_forked_child(x) #define ASSERT_IS_NOT_FORKED_CHILD() ASSERT_IS_NOT_FORKED_CHILD_TRAMPOLINE(__FUNCTION__) /// Determines if we are running under Microsoft's Windows Subsystem for Linux to work around /// some known limitations and/or bugs. /// See https://github.com/Microsoft/WSL/issues/423 and Microsoft/WSL#2997 bool is_windows_subsystem_for_linux(); /// Detect if we are running under Cygwin or Cgywin64 constexpr bool is_cygwin() { #ifdef __CYGWIN__ return true; #else return false; #endif } extern "C" { [[gnu::noinline]] void debug_thread_error(void); } /// Converts from wide char to digit in the specified base. If d is not a valid digit in the /// specified base, return -1. long convert_digit(wchar_t d, int base); /// This is a macro that can be used to silence "unused parameter" warnings from the compiler for /// functions which need to accept parameters they do not use because they need to be compatible /// with an interface. It's similar to the Python idiom of doing `_ = expr` at the top of a /// function in the same situation. #define UNUSED(expr) \ do { \ (void)(expr); \ } while (0) // Return true if the character is in a range reserved for fish's private use. bool fish_reserved_codepoint(wchar_t c); void redirect_tty_output(); std::string get_path_to_tmp_dir(); bool valid_var_name_char(wchar_t chr); bool valid_var_name(const wcstring &str); bool valid_var_name(const wchar_t *str); bool valid_func_name(const wcstring &str); // Return values (`$status` values for fish scripts) for various situations. enum { /// The status code used for normal exit in a command. STATUS_CMD_OK = 0, /// The status code used for failure exit in a command (but not if the args were invalid). STATUS_CMD_ERROR = 1, /// The status code used for invalid arguments given to a command. This is distinct from valid /// arguments that might result in a command failure. An invalid args condition is something /// like an unrecognized flag, missing or too many arguments, an invalid integer, etc. But STATUS_INVALID_ARGS = 2, /// The status code used when a command was not found. STATUS_CMD_UNKNOWN = 127, /// The status code used when an external command can not be run. STATUS_NOT_EXECUTABLE = 126, /// The status code used when a wildcard had no matches. STATUS_UNMATCHED_WILDCARD = 124, /// The status code used when illegal command name is encountered. STATUS_ILLEGAL_CMD = 123, /// The status code used when `read` is asked to consume too much data. STATUS_READ_TOO_MUCH = 122, /// The status code when an expansion fails, for example, "$foo[" STATUS_EXPAND_ERROR = 121, }; /* Normally casting an expression to void discards its value, but GCC versions 3.4 and newer have __attribute__ ((__warn_unused_result__)) which may cause unwanted diagnostics in that case. Use __typeof__ and __extension__ to work around the problem, if the workaround is known to be needed. */ #if 3 < __GNUC__ + (4 <= __GNUC_MINOR__) #define ignore_result(x) \ (__extension__({ \ __typeof__(x) __x = (x); \ (void)__x; \ })) #else #define ignore_result(x) ((void)(x)) #endif // Custom hash function used by unordered_map/unordered_set when key is const #ifndef CONST_WCSTRING_HASH #define CONST_WCSTRING_HASH 1 namespace std { template <> struct hash { std::size_t operator()(const wcstring &w) const { std::hash hasher; return hasher(w); } }; } // namespace std #endif /// Get the absolute path to the fish executable itself std::string get_executable_path(const char *argv0); /// A RAII wrapper for resources that don't recur, so we don't have to create a separate RAII /// wrapper for each function. Avoids needing to call "return cleanup()" or similar / everywhere. struct cleanup_t { private: const std::function cleanup; public: cleanup_t(std::function exit_actions) : cleanup{std::move(exit_actions)} {} ~cleanup_t() { cleanup(); } }; bool is_console_session(); /// Compile-time agnostic-size strcmp/wcscmp implementation. Unicode-unaware. template constexpr ssize_t const_strcmp(const T *lhs, const T *rhs) { return (*lhs == *rhs) ? (*lhs == 0 ? 0 : const_strcmp(lhs + 1, rhs + 1)) : (*lhs > *rhs ? 1 : -1); } /// Compile-time agnostic-size strlen/wcslen implementation. Unicode-unaware. template constexpr size_t const_strlen(const T (&val)[N], size_t last_checked_idx = N, size_t first_nul_idx = N) { // Assume there's a nul char at the end (index N) but there may be one before that that. return last_checked_idx == 0 ? first_nul_idx : const_strlen(val, last_checked_idx - 1, val[last_checked_idx - 1] ? first_nul_idx : last_checked_idx - 1); } /// \return true if the array \p vals is sorted by its name property. template constexpr bool is_sorted_by_name(const T (&vals)[N], size_t idx = 1) { return idx >= N ? true : (const_strcmp(vals[idx - 1].name, vals[idx].name) <= 0 && is_sorted_by_name(vals, idx + 1)); } #define ASSERT_SORTED_BY_NAME(x) static_assert(is_sorted_by_name(x), #x " not sorted by name") /// \return a pointer to the first entry with the given name, assuming the entries are sorted by /// name. \return nullptr if not found. template const T *get_by_sorted_name(const wchar_t *name, const T (&vals)[N]) { assert(name && "Null name"); auto is_less = [](const T &v, const wchar_t *n) -> bool { return std::wcscmp(v.name, n) < 0; }; auto where = std::lower_bound(std::begin(vals), std::end(vals), name, is_less); if (where != std::end(vals) && std::wcscmp(where->name, name) == 0) { return &*where; } return nullptr; } template const T *get_by_sorted_name(const wcstring &name, const T (&vals)[N]) { return get_by_sorted_name(name.c_str(), vals); } /// As established in 1ab81ab90d1a408702e11f081fdaaafa30636c31, iswdigit() is very slow under glibc, /// and does nothing more than establish whether or not the single specified character is in the /// range ('0','9'). __attribute__((always_inline)) bool inline iswdigit(const wchar_t c) { return c >= L'0' && c <= L'9'; } #endif // FISH_COMMON_H