diff --git a/.cirrus.yml b/.cirrus.yml index 22590139e..dee0cbc93 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -51,7 +51,8 @@ linux_task: - ninja -j 6 fish fish_tests - ninja fish_run_tests - only_if: $CIRRUS_REPO_OWNER == 'fish-shell' + # CI task disabled during RIIR transition + only_if: false && $CIRRUS_REPO_OWNER == 'fish-shell' linux_arm_task: matrix: @@ -74,7 +75,8 @@ linux_arm_task: - file ./fish - ninja fish_run_tests - only_if: $CIRRUS_REPO_OWNER == 'fish-shell' + # CI task disabled during RIIR transition + only_if: false && $CIRRUS_REPO_OWNER == 'fish-shell' freebsd_task: matrix: @@ -89,6 +91,8 @@ freebsd_task: image: freebsd-12-3-release-amd64 tests_script: - pkg install -y cmake-core devel/pcre2 devel/ninja misc/py-pexpect git-lite + # libclang.so is a required build dependency for rust-c++ ffi bridge + - pkg install -y llvm # BSDs have the following behavior: root may open or access files even if # the mode bits would otherwise disallow it. For example root may open() # a file with write privileges even if the file has mode 400. This breaks @@ -99,8 +103,16 @@ freebsd_task: - mkdir build && cd build - chown -R fish-user .. - sudo -u fish-user -s whoami + # FreeBSD's pkg currently has rust 1.66.0 while we need rust 1.67.0+. Use rustup to install + # the latest, but note that it only installs rust per-user. + - sudo -u fish-user -s fetch -qo - https://sh.rustup.rs > rustup.sh + - sudo -u fish-user -s sh ./rustup.sh -y --profile=minimal + # `sudo -s ...` does not invoke a login shell so we need a workaround to make sure the + # rustup environment is configured for subsequent `sudo -s ...` commands. + # For some reason, this doesn't do the job: + # - sudo -u fish-user sh -c 'echo source \$HOME/.cargo/env >> $HOME/.cshrc' - sudo -u fish-user -s cmake -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCTEST_PARALLEL_LEVEL=1 .. - - sudo -u fish-user -s ninja -j 6 fish fish_tests - - sudo -u fish-user -s ninja fish_run_tests + - sudo -u fish-user sh -c '. $HOME/.cargo/env; ninja -j 6 fish fish_tests' + - sudo -u fish-user sh -c '. $HOME/.cargo/env; ninja fish_run_tests' only_if: $CIRRUS_REPO_OWNER == 'fish-shell' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c39741e20..47b888e52 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,6 +16,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: SetupRust + uses: ATiltedTree/setup-rust@v1 + with: + rust-version: beta - name: Install deps run: | sudo apt install gettext libncurses5-dev libpcre2-dev python3-pip tmux @@ -42,6 +46,11 @@ jobs: steps: - uses: actions/checkout@v3 + - name: SetupRust + uses: ATiltedTree/setup-rust@v1 + with: + rust-version: beta + targets: "i686-unknown-linux-gnu" # setup-rust wants this space-separated - name: Install deps run: | sudo apt update @@ -53,10 +62,10 @@ jobs: CFLAGS: "-m32" run: | mkdir build && cd build - cmake -DFISH_USE_SYSTEM_PCRE2=OFF .. + cmake -DFISH_USE_SYSTEM_PCRE2=OFF -DRust_CARGO_TARGET=i686-unknown-linux-gnu .. - name: make run: | - make + make VERBOSE=1 - name: make test run: | make test @@ -67,6 +76,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: SetupRust + uses: ATiltedTree/setup-rust@v1 + with: + rust-version: beta - name: Install deps run: | sudo apt install gettext libncurses5-dev libpcre2-dev python3-pip tmux @@ -101,6 +114,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: SetupRust + uses: ATiltedTree/setup-rust@v1 + with: + rust-version: beta - name: Install deps run: | sudo apt install gettext libncurses5-dev libpcre2-dev python3-pip tmux @@ -127,6 +144,10 @@ jobs: steps: - uses: actions/checkout@v3 + - name: SetupRust + uses: ATiltedTree/setup-rust@v1 + with: + rust-version: beta - name: Install deps run: | sudo pip3 install pexpect diff --git a/.gitignore b/.gitignore index 917c3ac4d..52bf88e6b 100644 --- a/.gitignore +++ b/.gitignore @@ -89,3 +89,14 @@ __pycache__ /tags xcuserdata/ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + diff --git a/CMakeLists.txt b/CMakeLists.txt index 08d7c54e3..ec2908a4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -24,6 +24,8 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}") endif() +include(cmake/Rust.cmake) + # Error out when linking statically, it doesn't work. if (CMAKE_EXE_LINKER_FLAGS MATCHES ".*-static.*") message(FATAL_ERROR "Fish does not support static linking") @@ -43,6 +45,9 @@ endif() # - address, because that occurs for our mkostemp check (weak-linking requires us to compare `&mkostemp == nullptr`). add_compile_options(-Wall -Wextra -Wno-comment -Wno-address) +# Get extra C++ files from Rust. +get_property(FISH_EXTRA_SOURCES TARGET fish-rust PROPERTY fish_extra_cpp_files) + if ((CMAKE_CXX_COMPILER_ID STREQUAL "Clang") OR (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")) add_compile_options(-Wunused-template -Wunused-local-typedef -Wunused-macros) endif() @@ -53,6 +58,9 @@ add_compile_options(-fno-exceptions) # Undefine NDEBUG to keep assert() in release builds. add_definitions(-UNDEBUG) +# Allow including Rust headers in normal (not bindgen) builds. +add_definitions(-DINCLUDE_RUST_HEADERS) + # Enable large files on GNU. add_definitions(-D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE @@ -95,32 +103,32 @@ set(FISH_BUILTIN_SRCS src/builtins/bg.cpp src/builtins/bind.cpp src/builtins/block.cpp src/builtins/builtin.cpp src/builtins/cd.cpp src/builtins/command.cpp src/builtins/commandline.cpp src/builtins/complete.cpp src/builtins/contains.cpp - src/builtins/disown.cpp src/builtins/echo.cpp src/builtins/emit.cpp - src/builtins/eval.cpp src/builtins/exit.cpp src/builtins/fg.cpp + src/builtins/disown.cpp + src/builtins/eval.cpp src/builtins/fg.cpp src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp src/builtins/pwd.cpp src/builtins/random.cpp src/builtins/read.cpp - src/builtins/realpath.cpp src/builtins/return.cpp src/builtins/set.cpp + src/builtins/realpath.cpp src/builtins/set.cpp src/builtins/set_color.cpp src/builtins/source.cpp src/builtins/status.cpp src/builtins/string.cpp src/builtins/test.cpp src/builtins/type.cpp src/builtins/ulimit.cpp - src/builtins/wait.cpp) +) # List of other sources. set(FISH_SRCS src/ast.cpp src/abbrs.cpp src/autoload.cpp src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp src/env_universal_common.cpp src/event.cpp src/exec.cpp src/expand.cpp src/fallback.cpp src/fd_monitor.cpp src/fish_version.cpp - src/flog.cpp src/function.cpp src/future_feature_flags.cpp src/highlight.cpp + src/flog.cpp src/function.cpp src/highlight.cpp src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/io.cpp src/iothread.cpp src/job_group.cpp src/kill.cpp src/null_terminated_array.cpp src/operation_context.cpp src/output.cpp src/pager.cpp src/parse_execution.cpp src/parse_tree.cpp src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp - src/proc.cpp src/re.cpp src/reader.cpp src/redirection.cpp src/screen.cpp - src/signal.cpp src/termsize.cpp src/timer.cpp src/tinyexpr.cpp - src/tokenizer.cpp src/topic_monitor.cpp src/trace.cpp src/utf8.cpp src/util.cpp - src/wait_handle.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp - src/wutil.cpp src/fds.cpp + src/proc.cpp src/re.cpp src/reader.cpp src/screen.cpp + src/signals.cpp src/termsize.cpp src/tinyexpr.cpp + src/trace.cpp src/utf8.cpp + src/wait_handle.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp + src/wutil.cpp src/fds.cpp src/rustffi.cpp ) # Header files are just globbed. @@ -133,6 +141,11 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config_cmake.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) include_directories(${CMAKE_CURRENT_BINARY_DIR}) +# Pull in our src directory for headers searches, but only quoted ones. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -iquote ${CMAKE_CURRENT_SOURCE_DIR}/src") + + + # Set up standard directories. include(GNUInstallDirs) add_definitions(-D_UNICODE=1 @@ -175,8 +188,10 @@ endfunction(FISH_LINK_DEPS_AND_SIGN) add_library(fishlib STATIC ${FISH_SRCS} ${FISH_BUILTIN_SRCS}) target_sources(fishlib PRIVATE ${FISH_HEADERS}) target_link_libraries(fishlib + fish-rust ${CURSES_LIBRARY} ${CURSES_EXTRA_LIBRARY} Threads::Threads ${CMAKE_DL_LIBS} - ${PCRE2_LIB} ${Intl_LIBRARIES} ${ATOMIC_LIBRARY}) + ${PCRE2_LIB} ${Intl_LIBRARIES} ${ATOMIC_LIBRARY} + "fish-rust") target_include_directories(fishlib PRIVATE ${CURSES_INCLUDE_DIRS}) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6bfc24ba5..f19d2afce 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -420,8 +420,8 @@ Include What You Use You should not depend on symbols being visible to a ``*.cpp`` module from ``#include`` statements inside another header file. In other words if your module does ``#include "common.h"`` and that header does -``#include "signal.h"`` your module should not assume the sub-include is -present. It should instead directly ``#include "signal.h"`` if it needs +``#include "signals.h"`` your module should not assume the sub-include is +present. It should instead directly ``#include "signals.h"`` if it needs any symbol from that header. That makes the actual dependencies much clearer. It also makes it easy to modify the headers included by a specific header file without having to worry that will break any module diff --git a/README.rst b/README.rst index d13e9e5f0..3286fa055 100644 --- a/README.rst +++ b/README.rst @@ -148,6 +148,7 @@ Dependencies Compiling fish requires: +- Rust (version 1.67 or later) - a C++11 compiler (g++ 4.8 or later, or clang 3.3 or later) - CMake (version 3.5 or later) - a curses implementation such as ncurses (headers and libraries) diff --git a/cmake/Rust.cmake b/cmake/Rust.cmake new file mode 100644 index 000000000..fc1b8a3b9 --- /dev/null +++ b/cmake/Rust.cmake @@ -0,0 +1,49 @@ +include(FetchContent) + +# Don't let Corrosion's tests interfere with ours. +set(CORROSION_TESTS OFF CACHE BOOL "" FORCE) + +FetchContent_Declare( + Corrosion + GIT_REPOSITORY https://github.com/ridiculousfish/corrosion + GIT_TAG fish +) + +FetchContent_MakeAvailable(Corrosion) + +set(fish_rust_target "fish-rust") + +set(fish_autocxx_gen_dir "${CMAKE_BINARY_DIR}/fish-autocxx-gen/") + +corrosion_import_crate( + MANIFEST_PATH "${CMAKE_SOURCE_DIR}/fish-rust/Cargo.toml" + FEATURES "fish-ffi-tests" +) + +# We need the build dir because cxx puts our headers in there. +# Corrosion doesn't expose the build dir, so poke where we shouldn't. +if (Rust_CARGO_TARGET) + set(rust_target_dir "${CMAKE_BINARY_DIR}/cargo/build/${_CORROSION_RUST_CARGO_TARGET}") +else() + set(rust_target_dir "${CMAKE_BINARY_DIR}/cargo/build/${_CORROSION_RUST_CARGO_HOST_TARGET}") + corrosion_set_hostbuild(${fish_rust_target}) +endif() + +# Tell Cargo where our build directory is so it can find config.h. +corrosion_set_env_vars(${fish_rust_target} "FISH_BUILD_DIR=${CMAKE_BINARY_DIR}" "FISH_AUTOCXX_GEN_DIR=${fish_autocxx_gen_dir}" "FISH_RUST_TARGET_DIR=${rust_target_dir}") + +target_include_directories(${fish_rust_target} INTERFACE + "${rust_target_dir}/cxxbridge/${fish_rust_target}/src/" + "${fish_autocxx_gen_dir}/include/" +) + +# Tell fish what extra C++ files to compile. +define_property( + TARGET PROPERTY fish_extra_cpp_files + BRIEF_DOCS "Extra C++ files to compile for fish." + FULL_DOCS "Extra C++ files to compile for fish." +) + +set_property(TARGET ${fish_rust_target} PROPERTY fish_extra_cpp_files + "${fish_autocxx_gen_dir}/cxx/gen0.cxx" +) diff --git a/cmake/Tests.cmake b/cmake/Tests.cmake index ed745b4ce..b8b511ded 100644 --- a/cmake/Tests.cmake +++ b/cmake/Tests.cmake @@ -175,3 +175,19 @@ foreach(PEXPECT ${PEXPECTS}) set_tests_properties(${PEXPECT} PROPERTIES ENVIRONMENT FISH_FORCE_COLOR=1) add_test_target("${PEXPECT}") endforeach(PEXPECT) + +# Rust stuff. +add_test( + NAME "cargo-test" + COMMAND cargo test + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/fish-rust" +) +set_tests_properties("cargo-test" PROPERTIES SKIP_RETURN_CODE ${SKIP_RETURN_CODE}) +add_test_target("cargo-test") + +add_test( + NAME "cargo-test-widestring" + COMMAND cargo test + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/fish-rust/widestring-suffix/" +) +add_test_target("cargo-test-widestring") diff --git a/debian/control b/debian/control index 401a51de3..ed1b486dc 100644 --- a/debian/control +++ b/debian/control @@ -6,7 +6,7 @@ Uploaders: David Adam # Debhelper should be bumped to >= 10 once Ubuntu Xenial is no longer supported Build-Depends: debhelper (>= 9.20160115), libncurses5-dev, cmake (>= 3.5.0), gettext, libpcre2-dev, # Test dependencies - locales-all, python3 + locales-all, python3, rust (>= 1.67) | rust-mozilla (>= 1.67) Standards-Version: 4.1.5 Homepage: https://fishshell.com/ Vcs-Git: https://github.com/fish-shell/fish-shell.git diff --git a/doc_internal/fish-riir-plan.md b/doc_internal/fish-riir-plan.md new file mode 100644 index 000000000..32572ac34 --- /dev/null +++ b/doc_internal/fish-riir-plan.md @@ -0,0 +1,79 @@ +These is a proposed port of fish-shell from C++ to Rust, and from CMake to cargo or related. This document is high level - see the [Development Guide] for more details. + +## Why Port + +- Gain access to more contributors and enable easier contributions. C++ is becoming a legacy language. +- Free us from the annoyances of C++/CMake, and old toolchains. +- Ensure fish continues to be perceived as modern and relevant. +- Unlock concurrent mode (see below). + +## Why Rust + +- Rust is a systems programming language with broad platform support, a large community, and a relatively high probability of still being relevant in a decade. +- Rust has a unique strength in its thread safety features, which is the missing piece to enable concurrent mode - see below. +- Other languages considered: + - Java, Python and the scripting family are ruled out for startup latency and memory usage reasons. + - Go would be an awkward fit. fork is [quite the problem](https://stackoverflow.com/questions/28370646/how-do-i-fork-a-go-process/28371586#28371586) in Go. + - Other system languages (D, Nim, Zig...) are too niche: fewer contributors, higher risk of the language becoming irrelevant. + +## Risks + +- Large amount of work with possible introduction of new bugs. +- Long period of complicated builds. +- Existing contributors will have to learn Rust. +- As of yet unknown compatibility story for Tier 2+ platforms (Cygwin, etc). + +## Approach + +We will do an **incremental port** in the span of one release. We will have a period of using both C++ and Rust, and both cargo and CMake, leveraging FFI tools (see below). + +The work will **proceed on master**: no long-lived branches. Tests and CI continue to pass at every commit for recent Linux and Mac. Centos7, \*BSD, etc may be temporarily disabled if they prove problematic. + +The Rust code will initially resemble the replaced C++. Fidelity to existing code is more important than Rust idiomaticity, to aid review and bisecting. But don't take this to extremes - use judgement. + +The port will proceed "outside in." We'll start with leaf components (e.g. builtins) and proceed towards the core. Some components will have both a Rust and C++ implementation (e.g. FLOG), in other cases we'll change the existing C++ to invoke the new Rust implementations (builtins). + +After porting the C++, we'll replace CMake. + +We will continue to use wide chars, locales, gettext, printf format strings, and PCRE2. We will not change the fish scripting language at all. We will _not_ use this as an opportunity to fix existing design flaws, with a few carefully chosen exceptions. See [Strings](#strings). + +We will not use tokio, serde, async, or other fancy Rust frameworks initially. + +### FFI + +Rust/C++ interop will use [autocxx](https://github.com/google/autocxx), [Cxx](https://cxx.rs), and possibly [bindgen](https://rust-lang.github.io/rust-bindgen/). I've forked these for fish (see the [Development Guide]). Once the port is done, we will stop using them, except perhaps bindgen for PCRE2. + +We will use [corrosion](https://github.com/corrosion-rs/corrosion) for CMake integration. + +Inefficiencies (e.g. extra string copying) at the FFI layer are fine, since it will all get thrown away. + +Tests can stay in fish_tests.cpp or be moved into Rust .rs files; either is fine. + +### Strings + +Rust's `String` / `&str` types cannot represent non-UTF8 filenames or data using the default encoding scheme. That's why all string conversions must go through fish's encoding scheme (using the private-use area to encode invalid sequences). For example, fish cannot use `File::open` with a `&str` because the decoding will be incorrect. + +So instead of `String`, fish will use its own string type, and manage encoding and decoding as it does today. However we will make some specific changes: + +1. Drop the nul-terminated requirement. When passing `const wchar_t*` back to C++, we will allocate and copy into a nul-terminated buffer. +2. Drop support for 16-bit wchar. fish will use UTF32 on all platforms, and manage conversions itself. + +After the port we can consider moving to UTF-8, for memory usage reasons. + +See the [Rust Development Guide][Development Guide] for more on strings. + +### Thread Safety + +Allowing [background functions](https://github.com/fish-shell/fish-shell/issues/238) and concurrent functions has been a goal for many years. I have been nursing [a long-lived branch](https://github.com/ridiculousfish/fish-shell/tree/concurrent_even_simpler) which allows full threaded execution. But though the changes are small, I have been reluctant to propose them, because they will make reasoning about the shell internals too complex: it is difficult in C++ to check and enforce what crosses thread boundaries. + +This is Rust's bread and butter: we will encode thread requirements into our types, making it explicit and compiler-checked, via Send and Sync. Rust will allow turning on concurrent mode in a safe way, with a manageable increase in complexity, finally enabling this feature. + +## Timeline + +Handwaving, 6 months? Frankly unknown - there's 102 remaining .cpp files of various lengths. It'll go faster as we get better at it. Peter (ridiculous_fish) is motivated to work on this, other current contributors have some Rust as well, and we may also get new contributors from the Rust community. Part of the point is to make contribution easier. + +## Links + +- [Packaging Rust projects](https://wiki.archlinux.org/title/Rust_package_guidelines) from Arch Linux + +[Development Guide]: rust-devel.md diff --git a/doc_internal/rust-devel.md b/doc_internal/rust-devel.md new file mode 100644 index 000000000..60414f16f --- /dev/null +++ b/doc_internal/rust-devel.md @@ -0,0 +1,172 @@ +# fish-shell Rust Development Guide + +This describes how to get started building fish-shell in its partial Rust state, and how to contribute to the port. + +## Overview + +fish is in the process of transitioning from C++ to Rust. The fish project has a Rust crate embedded at path `fish-rust`. This crate builds a Rust library `libfish_rust.a` which is linked with the C++ `libfish.a`. Existing C++ code will be incrementally migrated to this crate; then CMake will be replaced with cargo and other Rust-native tooling. + +Important tools used during this transition: + +1. [Corrosion](https://github.com/corrosion-rs/corrosion) to invoke cargo from CMake. +2. [cxx](http://cxx.rs) for basic C++ <-> Rust interop. +3. [autocxx](https://google.github.io/autocxx/) for using C++ types in Rust. + +We use forks of the last two - see the [FFI section](#ffi) below. No special action is required to obtain these packages. They're downloaded by cargo. + +## Building + +### Build Dependencies + +fish-shell currently depends on Rust 1.67 or later. To install Rust, follow https://rustup.rs. + +### Build via CMake + +It is recommended to build inside `fish-shell/build`. This will make it easier for Rust to find the `config.h` file. + +Build via CMake as normal (use any generator, here we use Ninja): + +```shell +$ cd fish-shell +$ mkdir build && cd build +$ cmake -G Ninja .. +$ ninja +``` + +This will create the usual fish executables. + +### Build just libfish_rust.a with Cargo + +The directory `fish-rust` contains the Rust sources. These require that CMake has been run to produce `config.h` which is necessary for autocxx to succeed. + +Follow the "Build from CMake" steps above, and then: + +```shell +$ cd fish-shell/fish-rust +$ cargo build +``` + +This will build only the library, not a full working fish, but it allows faster iteration for Rust development. That is, after running `cmake` you can open the `fish-rust` as the root of a Rust crate, and tools like rust-analyzer will work. + +## Development + +The basic development loop for this port: + +1. Pick a .cpp (or in some cases .h) file to port, say `util.cpp`. +2. Add the corresponding `util.rs` file to `fish-rust/`. +3. Reimplement it in Rust, along with its dependencies as needed. Match the existing C++ code where practical, including propagating any relevant comments. + - Do this even if it results in less idiomatic Rust, but avoid being super-dogmatic either way. + - One technique is to paste the C++ into the Rust code, commented out, and go line by line. +4. Decide whether any existing C++ callers should invoke the Rust implementation, or whether we should keep the C++ one. + - Utility functions may have both a Rust and C++ implementation. An example is `FLOG` where interop is too hard. + - Major components (e.g. builtin implementations) should _not_ be duplicated; instead the Rust should call C++ or vice-versa. + +You will likely run into limitations of [`autocxx`](https://google.github.io/autocxx/) and to a lesser extent [`cxx`](https://cxx.rs/). See the [FFI sections](#ffi) below. + +## Type Mapping + +### Constants & Type Aliases + +The FFI does not support constants (`#define` or `static const`) or type aliases (`typedef`, `using`). Duplicate them using their Rust equivalent (`pub const` and `type`/`struct`/`enum`). + +### Non-POD types + +Many types cannot currently be passed across the language boundary by value or occur in shared structs. As a workaround, use references, raw pointers or smart pointers (`cxx` provides `SharedPtr` and `UniquePtr`). Try to keep workarounds on the C++ side and the FFI layer of the Rust code. This ensures we will get rid of the workarounds as we peel off the FFI layer. + +### Strings + +Fish will mostly _not_ use Rust's `String/&str` types as these cannot represent non-UTF8 data using the default encoding. + +fish's primary string types will come from the [`widestring` crate](https://docs.rs/widestring). The two main string types are `WString` and `&wstr`, which are renamed [Utf32String](https://docs.rs/widestring/latest/widestring/utfstring/struct.Utf32String.html) and [Utf32Str](https://docs.rs/widestring/latest/widestring/utfstr/struct.Utf32Str.html). `WString` is an owned, heap-allocated UTF32 string, `&wstr` a borrowed UTF32 slice. + +In general, follow this mapping when porting from C++: + +- `wcstring` -> `WString` +- `const wcstring &` -> `&wstr` +- `const wchar_t *` -> `&wstr` + +None of the Rust string types are nul-terminated. We're taking this opportunity to drop the nul-terminated aspect of wide string handling. + +#### Creating strings + +One may create a `&wstr` from a string literal using the `wchar::L!` macro: + +```rust +use crate::wchar::{wstr, L!} + +fn get_shell_name() -> &'static wstr { + L!("fish") +} +``` + +There is also a `widestrs` proc-macro which enables L as a _suffix_, to reduce the noise. This can be applied to any block, including modules and individual functions: + +```rust +use crate::wchar::{wstr, widestrs} + +#[widestrs] +fn get_shell_name() -> &'static wstr { + "fish"L // equivalent to L!("fish") +} +``` + +### Strings for FFI + +`WString` and `&wstr` are the common strings used by Rust components. At the FII boundary there are some additional strings for interop. _All of these are temporary for the duration of the port._ + +- `CxxWString` is the Rust binding of `std::wstring`. It is the wide-string analog to [`CxxString`](https://cxx.rs/binding/cxxstring.html) and is [added in our fork of cxx](https://github.com/ridiculousfish/cxx/blob/fish/src/cxx_wstring.rs). This is useful for functions which return e.g. `const wcstring &`. +- `W0String` is renamed [U32CString](https://docs.rs/widestring/latest/widestring/ucstring/struct.U32CString.html). This is basically `WString` except it _is_ nul-terminated. This is useful for getting a nul-terminated `const wchar_t *` to pass to C++ implementations. +- `wcharz_t` is an annoying C++ struct which merely wraps a `const wchar_t *`, used for passing these pointers from C++ to Rust. We would prefer to use `const wchar_t *` directly but `autocxx` refuses to generate bindings for types such as `std::vector` so we wrap it in this silly struct. + +Note C++ `wchar_t`, Rust `char`, and `u32` are effectively interchangeable: you can cast pointers to them back and forth (except we check upon u32->char conversion). However be aware of which types are nul-terminated. + +These types should be confined to the FFI modules, in particular `wchar_ffi`. They should not "leak" into other modules. See the `wchar_ffi` module. + +### Format strings + +Rust's builtin `std::fmt` modules do not accept runtime-provided format strings, so we mostly won't use them, except perhaps for FLOG / other non-translated text. + +Instead we'll continue to use printf-style strings, with a Rust printf implementation. + +### Vectors + +See [`Vec`](https://cxx.rs/binding/vec.html) and [`CxxVector`](https://cxx.rs/binding/cxxvector.html). + +In many cases, `autocxx` refuses to allow vectors of certain types. For example, autocxx supports `std::vector` and `std::shared_ptr` but NOT `std::vector>`. To work around this one can create a helper (pointer, length) struct. Example: + +```cpp +struct RustFFIJobList { + std::shared_ptr *jobs; + size_t count; +}; +``` + +This is just a POD (plain old data) so autocxx can generate bindings for it. Then it is trivial to convert it to a Rust slice: + +``` +pub fn get_jobs(ffi_jobs: &ffi::RustFFIJobList) -> &[SharedPtr] { + unsafe { slice::from_raw_parts(ffi_jobs.jobs, ffi_jobs.count) } +} +``` + +Another workaround is to define a struct that contains the shared pointer, and create a vector of that struct. + +## Development Tooling + +The [autocxx guidance](https://google.github.io/autocxx/workflow.html#how-can-i-see-what-bindings-autocxx-has-generated) is helpful: + +1. Install cargo expand (`cargo install cargo-expand`). Then you can use `cargo expand` to see the generated Rust bindings for C++. In particular this is useful for seeing failed expansions for C++ types that autocxx cannot handle. +2. In rust-analyzer, enable Proc Macro and Proc Macro Attributes. + +## FFI + +The boundary between Rust and C++ is referred to as the Foreign Function Interface, or FFI. + +`autocxx` and `cxx` both are designed for long-term interop: C++ and Rust coexisting for years. To this end, both emphasize safety: requiring lots of `unsafe`, `Pin`, etc. + +fish plans to use them only temporarily, with a focus on getting things working. To this end, both cxx and autocxx have been forked to support fish: + +1. Relax the requirement that all functions taking pointers are `unsafe` (this just added noise). +2. Add support for `wchar_t` as a recognized type, and `CxxWString` analogous to `CxxString`. + +See the `Cargo.toml` file for the locations of the forks. diff --git a/fish-rust/Cargo.lock b/fish-rust/Cargo.lock new file mode 100644 index 000000000..87084289d --- /dev/null +++ b/fish-rust/Cargo.lock @@ -0,0 +1,1035 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "addr2line" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + +[[package]] +name = "aquamarine" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a941c39708478e8eea39243b5983f1c42d2717b3620ee91f4a52115fd02ac43f" +dependencies = [ + "itertools 0.9.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "autocxx" +version = "0.23.1" +source = "git+https://github.com/ridiculousfish/autocxx?branch=fish#311485f38289a352dcaddaad7f819f93f6e7df99" +dependencies = [ + "aquamarine", + "autocxx-macro", + "cxx", + "moveit", +] + +[[package]] +name = "autocxx-bindgen" +version = "0.62.0" +source = "git+https://github.com/ridiculousfish/autocxx-bindgen?branch=fish#a229d3473bd90d2d10fc61a244408cfc1958934a" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "itertools 0.10.5", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex", + "syn", + "which", +] + +[[package]] +name = "autocxx-build" +version = "0.23.1" +source = "git+https://github.com/ridiculousfish/autocxx?branch=fish#311485f38289a352dcaddaad7f819f93f6e7df99" +dependencies = [ + "autocxx-engine", + "env_logger", + "indexmap", + "syn", +] + +[[package]] +name = "autocxx-engine" +version = "0.23.1" +source = "git+https://github.com/ridiculousfish/autocxx?branch=fish#311485f38289a352dcaddaad7f819f93f6e7df99" +dependencies = [ + "aquamarine", + "autocxx-bindgen", + "autocxx-parser", + "cc", + "cxx-gen", + "indexmap", + "indoc", + "itertools 0.10.5", + "log", + "miette", + "once_cell", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustversion", + "serde_json", + "strum_macros", + "syn", + "tempfile", + "thiserror", + "version_check", +] + +[[package]] +name = "autocxx-macro" +version = "0.23.1" +source = "git+https://github.com/ridiculousfish/autocxx?branch=fish#311485f38289a352dcaddaad7f819f93f6e7df99" +dependencies = [ + "autocxx-parser", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocxx-parser" +version = "0.23.1" +source = "git+https://github.com/ridiculousfish/autocxx?branch=fish#311485f38289a352dcaddaad7f819f93f6e7df99" +dependencies = [ + "indexmap", + "itertools 0.10.5", + "log", + "once_cell", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn", + "thiserror", +] + +[[package]] +name = "backtrace" +version = "0.3.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cc" +version = "1.0.79" +source = "git+https://github.com/mqudsi/cc-rs?branch=fish#cdc3a376eb0f56c2fb2cf640cc0e9192feaa621b" + +[[package]] +name = "cexpr" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa2e27ae6ab525c3d369ded447057bca5438d86dc3a68f6faafb8269ba82ebf3" +dependencies = [ + "glob", + "libc", + "libloading", +] + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + +[[package]] +name = "ctor" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "cxx" +version = "1.0.81" +source = "git+https://github.com/ridiculousfish/cxx?branch=fish#24d1bac1da6abbc2b483760358676e95262aca63" +dependencies = [ + "cc", + "cxxbridge-flags", + "cxxbridge-macro", + "link-cplusplus", + "widestring", +] + +[[package]] +name = "cxx-build" +version = "1.0.81" +source = "git+https://github.com/ridiculousfish/cxx?branch=fish#24d1bac1da6abbc2b483760358676e95262aca63" +dependencies = [ + "cc", + "codespan-reporting", + "once_cell", + "proc-macro2", + "quote", + "scratch", + "syn", +] + +[[package]] +name = "cxx-gen" +version = "0.7.81" +source = "git+https://github.com/ridiculousfish/cxx?branch=fish#24d1bac1da6abbc2b483760358676e95262aca63" +dependencies = [ + "codespan-reporting", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "cxxbridge-flags" +version = "1.0.81" +source = "git+https://github.com/ridiculousfish/cxx?branch=fish#24d1bac1da6abbc2b483760358676e95262aca63" + +[[package]] +name = "cxxbridge-macro" +version = "1.0.81" +source = "git+https://github.com/ridiculousfish/cxx?branch=fish#24d1bac1da6abbc2b483760358676e95262aca63" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "either" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "env_logger" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "fastrand" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a407cfaa3385c4ae6b23e84623d48c2798d06e3e6a1878f7f59f17b3f86499" +dependencies = [ + "instant", +] + +[[package]] +name = "fish-rust" +version = "0.1.0" +dependencies = [ + "autocxx", + "autocxx-build", + "cxx", + "cxx-build", + "cxx-gen", + "errno", + "inventory", + "libc", + "miette", + "nix", + "num-traits", + "once_cell", + "unixstring", + "widestring", + "widestring-suffix", +] + +[[package]] +name = "getrandom" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "ghost" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41973d4c45f7a35af8753ba3457cc99d406d863941fd7f52663cff54a5ab99b3" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "gimli" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "221996f774192f0f718773def8201c4ae31f02616a54ccfc2d358bb0e5cefdec" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "indexmap" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +dependencies = [ + "autocfg", + "hashbrown", + "serde", +] + +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "inventory" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16fe3b35d64bd1f72917f06425e7573a2f63f74f42c8f56e53ea6826dde3a2b5" +dependencies = [ + "ctor", + "ghost", +] + +[[package]] +name = "is_ci" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "616cde7c720bb2bb5824a224687d8f77bfd38922027f01d825cd7453be5099fb" + +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "libc" +version = "0.2.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + +[[package]] +name = "link-cplusplus" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" +dependencies = [ + "cc", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "miette" +version = "5.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4afd9b301defa984bbdbe112b4763e093ed191750a0d914a78c1106b2d0fe703" +dependencies = [ + "atty", + "backtrace", + "miette-derive", + "once_cell", + "owo-colors", + "supports-color", + "supports-hyperlinks", + "supports-unicode", + "terminal_size", + "textwrap", + "thiserror", + "unicode-width", +] + +[[package]] +name = "miette-derive" +version = "5.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c2401ab7ac5282ca5c8b518a87635b1a93762b0b90b9990c509888eeccba29" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa" +dependencies = [ + "adler", +] + +[[package]] +name = "moveit" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7d756ffe4e38013507d35bf726a93fcdae2cae043ab5ce477f13857a335030d" +dependencies = [ + "cxx", +] + +[[package]] +name = "nix" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" +dependencies = [ + "autocfg", + "bitflags", + "cfg-if", + "libc", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "object" +version = "0.30.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" + +[[package]] +name = "owo-colors" +version = "3.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "prettyplease" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e97e3215779627f01ee256d2fad52f3d95e8e1c11e9fc6fd08f7cd455d5d5c78" +dependencies = [ + "proc-macro2", + "syn", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustversion" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5583e89e108996506031660fe09baa5011b9dd0341b89029313006d1fb508d70" + +[[package]] +name = "ryu" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde" + +[[package]] +name = "scratch" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddccb15bcce173023b3fedd9436f882a0739b8dfb45e4f6b6002bee5929f61b2" + +[[package]] +name = "serde" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.152" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "shlex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" + +[[package]] +name = "smawk" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" + +[[package]] +name = "strum_macros" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "supports-color" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ba6faf2ca7ee42fdd458f4347ae0a9bd6bcc445ad7cb57ad82b383f18870d6f" +dependencies = [ + "atty", + "is_ci", +] + +[[package]] +name = "supports-hyperlinks" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "590b34f7c5f01ecc9d78dba4b3f445f31df750a67621cf31626f3b7441ce6406" +dependencies = [ + "atty", +] + +[[package]] +name = "supports-unicode" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8b945e45b417b125a8ec51f1b7df2f8df7920367700d1f98aedd21e5735f8b2" +dependencies = [ + "atty", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "terminal_size" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "633c1a546cee861a1a6d0dc69ebeca693bf4296661ba7852b9d21d159e0506df" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "textwrap" +version = "0.15.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7b3e525a49ec206798b40326a44121291b530c963cfb01018f63e135bac543d" +dependencies = [ + "smawk", + "unicode-linebreak", + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "unicode-linebreak" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5faade31a542b8b35855fff6e8def199853b2da8da256da52f52f1316ee3137" +dependencies = [ + "hashbrown", + "regex", +] + +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + +[[package]] +name = "unixstring" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "366c5c5657cbe7a684b3476acc7b96d4087e953bf750b1eab4dfbffeda32b2f3" +dependencies = [ + "libc", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "which" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2441c784c52b289a054b7201fc93253e288f094e2f4be9058343127c4226a269" +dependencies = [ + "either", + "libc", + "once_cell", +] + +[[package]] +name = "widestring" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" + +[[package]] +name = "widestring-suffix" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/fish-rust/Cargo.toml b/fish-rust/Cargo.toml new file mode 100644 index 000000000..df8206419 --- /dev/null +++ b/fish-rust/Cargo.toml @@ -0,0 +1,54 @@ +[package] +name = "fish-rust" +version = "0.1.0" +edition = "2021" + + +[dependencies] +widestring-suffix = { path = "./widestring-suffix/" } + +autocxx = "0.23.1" +cxx = "1.0" +errno = "0.2.8" +inventory = { version = "0.3.3", optional = true} +libc = "0.2.137" +nix = { version = "0.25.0", default-features = false, features = [] } +num-traits = "0.2.15" +once_cell = "1.17.0" +unixstring = "0.2.7" +widestring = "1.0.2" + +[build-dependencies] +autocxx-build = "0.23.1" +cxx-build = { git = "https://github.com/ridiculousfish/cxx", branch = "fish" } +cxx-gen = { git = "https://github.com/ridiculousfish/cxx", branch = "fish" } +miette = { version = "5", features = ["fancy"] } + +[lib] +crate-type=["staticlib"] + +[features] +# The fish-ffi-tests feature causes tests to be built which need to use the FFI. +# These tests are run by fish_tests(). +default = ["fish-ffi-tests"] +fish-ffi-tests = ["inventory"] + +[patch.crates-io] +cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" } +cxx = { git = "https://github.com/ridiculousfish/cxx", branch = "fish" } +cxx-gen = { git = "https://github.com/ridiculousfish/cxx", branch = "fish" } +autocxx = { git = "https://github.com/ridiculousfish/autocxx", branch = "fish" } +autocxx-build = { git = "https://github.com/ridiculousfish/autocxx", branch = "fish" } +autocxx-bindgen = { git = "https://github.com/ridiculousfish/autocxx-bindgen", branch = "fish" } + +[patch.'https://github.com/ridiculousfish/cxx'] +cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" } + +[patch.'https://github.com/ridiculousfish/autocxx'] +cc = { git = "https://github.com/mqudsi/cc-rs", branch = "fish" } + +#cxx = { path = "../../cxx" } +#cxx-gen = { path="../../cxx/gen/lib" } +#autocxx = { path = "../../autocxx" } +#autocxx-build = { path = "../../autocxx/gen/build" } +#autocxx-bindgen = { path = "../../autocxx-bindgen" } diff --git a/fish-rust/build.rs b/fish-rust/build.rs new file mode 100644 index 000000000..cef14f542 --- /dev/null +++ b/fish-rust/build.rs @@ -0,0 +1,55 @@ +fn main() -> miette::Result<()> { + let rust_dir = std::env::var("CARGO_MANIFEST_DIR").expect("Env var CARGO_MANIFEST_DIR missing"); + let target_dir = + std::env::var("FISH_RUST_TARGET_DIR").unwrap_or(format!("{}/{}", rust_dir, "target/")); + let fish_src_dir = format!("{}/{}", rust_dir, "../src/"); + + // Where cxx emits its header. + let cxx_include_dir = format!("{}/{}", target_dir, "cxxbridge/rust/"); + + // If FISH_BUILD_DIR is given by CMake, then use it; otherwise assume it's at ../build. + let fish_build_dir = + std::env::var("FISH_BUILD_DIR").unwrap_or(format!("{}/{}", rust_dir, "../build/")); + + // Where autocxx should put its stuff. + let autocxx_gen_dir = std::env::var("FISH_AUTOCXX_GEN_DIR") + .unwrap_or(format!("{}/{}", fish_build_dir, "fish-autocxx-gen/")); + + // Emit cxx junk. + // This allows "Rust to be used from C++" + // This must come before autocxx so that cxx can emit its cxx.h header. + let source_files = vec![ + "src/fd_readable_set.rs", + "src/ffi_init.rs", + "src/ffi_tests.rs", + "src/future_feature_flags.rs", + "src/parse_constants.rs", + "src/redirection.rs", + "src/smoke.rs", + "src/timer.rs", + "src/tokenizer.rs", + "src/topic_monitor.rs", + "src/util.rs", + "src/builtins/shared.rs", + ]; + cxx_build::bridges(&source_files) + .flag_if_supported("-std=c++11") + .include(&fish_src_dir) + .include(&fish_build_dir) // For config.h + .include(&cxx_include_dir) // For cxx.h + .compile("fish-rust"); + + // Emit autocxx junk. + // This allows "C++ to be used from Rust." + let include_paths = [&fish_src_dir, &fish_build_dir, &cxx_include_dir]; + let mut b = autocxx_build::Builder::new("src/ffi.rs", include_paths) + .custom_gendir(autocxx_gen_dir.into()) + .build()?; + b.flag_if_supported("-std=c++11") + .compile("fish-rust-autocxx"); + for file in source_files { + println!("cargo:rerun-if-changed={file}"); + } + + Ok(()) +} diff --git a/fish-rust/src/builtins/echo.rs b/fish-rust/src/builtins/echo.rs new file mode 100644 index 000000000..9b251cd87 --- /dev/null +++ b/fish-rust/src/builtins/echo.rs @@ -0,0 +1,232 @@ +//! Implementation of the echo builtin. + +use libc::c_int; + +use super::shared::{builtin_missing_argument, io_streams_t, STATUS_CMD_OK, STATUS_INVALID_ARGS}; +use crate::ffi::parser_t; +use crate::wchar::{wchar_literal_byte, wstr, WString, L}; +use crate::wgetopt::{wgetopter_t, woption}; + +#[derive(Debug, Clone, Copy)] +struct Options { + print_newline: bool, + print_spaces: bool, + interpret_special_chars: bool, +} + +impl Default for Options { + fn default() -> Self { + Self { + print_newline: true, + print_spaces: true, + interpret_special_chars: false, + } + } +} + +fn parse_options( + args: &mut [&wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, +) -> Result<(Options, usize), Option> { + let cmd = args[0]; + + const SHORT_OPTS: &wstr = L!("+:Eens"); + const LONG_OPTS: &[woption] = &[]; + + let mut opts = Options::default(); + + let mut oldopts = opts; + let mut oldoptind = 0; + + let mut w = wgetopter_t::new(SHORT_OPTS, LONG_OPTS, args); + while let Some(c) = w.wgetopt_long() { + match c { + 'n' => opts.print_newline = false, + 'e' => opts.interpret_special_chars = true, + 's' => opts.print_spaces = false, + 'E' => opts.interpret_special_chars = false, + ':' => { + builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], true); + return Err(STATUS_INVALID_ARGS); + } + '?' => { + return Ok((oldopts, w.woptind - 1)); + } + _ => { + panic!("unexpected retval from wgetopter::wgetopt_long()"); + } + } + + // Super cheesy: We keep an old copy of the option state around, + // so we can revert it in case we get an argument like + // "-n foo". + // We need to keep it one out-of-date so we can ignore the *last* option. + // (this might be an issue in wgetopt, but that's a whole other can of worms + // and really only occurs with our weird "put it back" option parsing) + if w.woptind == oldoptind + 2 { + oldopts = opts; + oldoptind = w.woptind; + } + } + + Ok((opts, w.woptind)) +} + +/// Parse a numeric escape sequence in `s`, returning the number of characters consumed and the +/// resulting value. Supported escape sequences: +/// +/// - `0nnn`: octal value, zero to three digits +/// - `nnn`: octal value, one to three digits +/// - `xhh`: hex value, one to two digits +fn parse_numeric_sequence(chars: I) -> Option<(usize, u8)> +where + I: IntoIterator, +{ + let mut chars = chars.into_iter().peekable(); + + // the first character of the numeric part of the sequence + let mut start = 0; + + let mut base: u8 = 0; + let mut max_digits = 0; + + let first = *chars.peek()?; + if first.is_digit(8) { + // Octal escape + base = 8; + + // If the first digit is a 0, we allow four digits (including that zero); otherwise, we + // allow 3. + max_digits = if first == '0' { 4 } else { 3 }; + } else if first == 'x' { + // Hex escape + base = 16; + max_digits = 2; + + // Skip the x + start = 1; + }; + + if base == 0 { + return None; + } + + let mut val = 0; + let mut consumed = start; + for digit in chars + .skip(start) + .take(max_digits) + .map_while(|c| c.to_digit(base.into())) + { + // base is either 8 or 16, so digit can never be >255 + let digit = u8::try_from(digit).unwrap(); + + val = val * base + digit; + + consumed += 1; + } + + // We succeeded if we consumed at least one digit. + if consumed > 0 { + Some((consumed, val)) + } else { + None + } +} + +/// The echo builtin. +/// +/// Bash only respects `-n` if it's the first argument. We'll do the same. We also support a new, +/// fish specific, option `-s` to mean "no spaces". +pub fn echo( + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&wstr], +) -> Option { + let (opts, optind) = match parse_options(args, parser, streams) { + Ok((opts, optind)) => (opts, optind), + Err(err @ Some(_)) if err != STATUS_CMD_OK => return err, + Err(err) => panic!("Illogical exit code from parse_options(): {err:?}"), + }; + + // The special character \c can be used to indicate no more output. + let mut output_stopped = false; + + // We buffer output so we can write in one go, + // this matters when writing to an fd. + let mut out = WString::new(); + let args_to_echo = &args[optind..]; + 'outer: for (idx, arg) in args_to_echo.iter().enumerate() { + if opts.print_spaces && idx > 0 { + out.push(' '); + } + + let mut chars = arg.chars().peekable(); + while let Some(c) = chars.next() { + if !opts.interpret_special_chars || c != '\\' { + // Not an escape. + out.push(c); + continue; + } + + let Some(next_char) = chars.peek() else { + // Incomplete escape sequence is echoed verbatim + out.push('\\'); + break; + }; + + // Most escapes consume one character in addition to the backslash; the numeric + // sequences may consume more, while an unrecognized escape sequence consumes none. + let mut consumed = 1; + + let escaped = match next_char { + 'a' => '\x07', + 'b' => '\x08', + 'e' => '\x1B', + 'f' => '\x0C', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + 'v' => '\x0B', + '\\' => '\\', + 'c' => { + output_stopped = true; + break 'outer; + } + _ => { + // Octal and hex escape sequences. + if let Some((digits_consumed, narrow_val)) = + parse_numeric_sequence(chars.clone()) + { + consumed = digits_consumed; + // The narrow_val is a literal byte that we want to output (#1894). + wchar_literal_byte(narrow_val) + } else { + consumed = 0; + '\\' + } + } + }; + + // Skip over characters that were part of this escape sequence (after the backslash + // that was consumed by the `while` loop). + // TODO: `Iterator::advance_by()`: https://github.com/rust-lang/rust/issues/77404 + for _ in 0..consumed { + let _ = chars.next(); + } + + out.push(escaped); + } + } + + if opts.print_newline && !output_stopped { + out.push('\n'); + } + + if !out.is_empty() { + streams.out.append(out); + } + + STATUS_CMD_OK +} diff --git a/fish-rust/src/builtins/emit.rs b/fish-rust/src/builtins/emit.rs new file mode 100644 index 000000000..83bf55d8c --- /dev/null +++ b/fish-rust/src/builtins/emit.rs @@ -0,0 +1,52 @@ +use libc::c_int; +use widestring_suffix::widestrs; + +use super::shared::{ + builtin_print_help, io_streams_t, HelpOnlyCmdOpts, STATUS_CMD_OK, STATUS_INVALID_ARGS, +}; +use crate::ffi::{self, parser_t, Repin}; +use crate::wchar_ffi::{wstr, W0String, WCharToFFI}; +use crate::wutil::format::printf::sprintf; + +#[widestrs] +pub fn emit( + parser: &mut parser_t, + streams: &mut io_streams_t, + argv: &mut [&wstr], +) -> Option { + let cmd = argv[0]; + + let opts = match HelpOnlyCmdOpts::parse(argv, parser, streams) { + Ok(opts) => opts, + Err(err @ Some(_)) if err != STATUS_CMD_OK => return err, + Err(err) => panic!("Illogical exit code from parse_options(): {err:?}"), + }; + + if opts.print_help { + builtin_print_help(parser, streams, cmd); + return STATUS_CMD_OK; + } + + let Some(event_name) = argv.get(opts.optind) else { + streams.err.append(&sprintf!("%ls: expected event name\n"L, cmd)); + return STATUS_INVALID_ARGS; + }; + + let event_args: Vec = argv[opts.optind + 1..] + .iter() + .map(|s| W0String::from_ustr(s).unwrap()) + .collect(); + let event_arg_ptrs: Vec = event_args + .iter() + .map(|s| ffi::wcharz_t { str_: s.as_ptr() }) + .collect(); + + ffi::event_fire_generic( + parser.pin(), + event_name.to_ffi(), + event_arg_ptrs.as_ptr(), + c_int::try_from(event_arg_ptrs.len()).unwrap().into(), + ); + + STATUS_CMD_OK +} diff --git a/fish-rust/src/builtins/exit.rs b/fish-rust/src/builtins/exit.rs new file mode 100644 index 000000000..b0ffc0f77 --- /dev/null +++ b/fish-rust/src/builtins/exit.rs @@ -0,0 +1,26 @@ +use libc::c_int; + +use super::r#return::parse_return_value; +use super::shared::io_streams_t; +use crate::ffi::{parser_t, Repin}; +use crate::wchar::wstr; + +/// Function for handling the exit builtin. +pub fn exit( + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&wstr], +) -> Option { + let retval = match parse_return_value(args, parser, streams) { + Ok(v) => v, + Err(e) => return e, + }; + + // Mark that we are exiting in the parser. + // TODO: in concurrent mode this won't successfully exit a pipeline, as there are other parsers + // involved. That is, `exit | sleep 1000` may not exit as hoped. Need to rationalize what + // behavior we want here. + parser.pin().libdata().set_exit_current_script(true); + + return Some(retval); +} diff --git a/fish-rust/src/builtins/mod.rs b/fish-rust/src/builtins/mod.rs new file mode 100644 index 000000000..6634804b7 --- /dev/null +++ b/fish-rust/src/builtins/mod.rs @@ -0,0 +1,7 @@ +pub mod shared; + +pub mod echo; +pub mod emit; +pub mod r#return; +pub mod wait; +mod exit; diff --git a/fish-rust/src/builtins/return.rs b/fish-rust/src/builtins/return.rs new file mode 100644 index 000000000..650c73232 --- /dev/null +++ b/fish-rust/src/builtins/return.rs @@ -0,0 +1,130 @@ +// Implementation of the return builtin. + +use libc::c_int; +use num_traits::abs; + +use super::shared::{ + builtin_missing_argument, builtin_print_error_trailer, builtin_print_help, io_streams_t, + BUILTIN_ERR_NOT_NUMBER, STATUS_CMD_OK, STATUS_INVALID_ARGS, +}; +use crate::builtins::shared::BUILTIN_ERR_TOO_MANY_ARGUMENTS; +use crate::ffi::{parser_t, Repin}; +use crate::wchar::{wstr, L}; +use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; +use crate::wutil::fish_wcstoi; +use crate::wutil::wgettext_fmt; + +#[derive(Debug, Clone, Copy, Default)] +struct Options { + print_help: bool, +} + +fn parse_options( + args: &mut [&wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, +) -> Result<(Options, usize), Option> { + let cmd = args[0]; + + const SHORT_OPTS: &wstr = L!(":h"); + const LONG_OPTS: &[woption] = &[wopt(L!("help"), woption_argument_t::no_argument, 'h')]; + + let mut opts = Options::default(); + + let mut w = wgetopter_t::new(SHORT_OPTS, LONG_OPTS, args); + + while let Some(c) = w.wgetopt_long() { + match c { + 'h' => opts.print_help = true, + ':' => { + builtin_missing_argument(parser, streams, cmd, args[w.woptind - 1], true); + return Err(STATUS_INVALID_ARGS); + } + '?' => { + // We would normally invoke builtin_unknown_option() and return an error. + // But for this command we want to let it try and parse the value as a negative + // return value. + return Ok((opts, w.woptind - 1)); + } + _ => { + panic!("unexpected retval from wgetopt_long"); + } + } + } + + Ok((opts, w.woptind)) +} + +/// Function for handling the return builtin. +pub fn r#return( + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&wstr], +) -> Option { + let mut retval = match parse_return_value(args, parser, streams) { + Ok(v) => v, + Err(e) => return e, + }; + + let has_function_block = parser.ffi_has_funtion_block(); + + // *nix does not support negative return values, but our `return` builtin happily accepts being + // called with negative literals (e.g. `return -1`). + // Map negative values to (256 - their absolute value). This prevents `return -1` from + // evaluating to a `$status` of 0 and keeps us from running into undefined behavior by trying to + // left shift a negative value in W_EXITCODE(). + if retval < 0 { + retval = 256 - (abs(retval) % 256); + } + + // If we're not in a function, exit the current script (but not an interactive shell). + if !has_function_block { + if !parser.is_interactive() { + parser.pin().libdata().set_exit_current_script(true); + } + return Some(retval); + } + + // Mark a return in the libdata. + parser.pin().libdata().set_returning(true); + + return Some(retval); +} + +pub fn parse_return_value( + args: &mut [&wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, +) -> Result> { + let cmd = args[0]; + let (opts, optind) = match parse_options(args, parser, streams) { + Ok((opts, optind)) => (opts, optind), + Err(err @ Some(_)) if err != STATUS_CMD_OK => return Err(err), + Err(err) => panic!("Illogical exit code from parse_options(): {err:?}"), + }; + if opts.print_help { + builtin_print_help(parser, streams, cmd); + return Err(STATUS_CMD_OK); + } + if optind + 1 < args.len() { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd)); + builtin_print_error_trailer(parser, streams, cmd); + return Err(STATUS_INVALID_ARGS); + } + if optind == args.len() { + Ok(parser.get_last_status().into()) + } else { + match fish_wcstoi(args[optind].chars()) { + Ok(i) => Ok(i), + Err(_e) => { + streams + .err + .append(wgettext_fmt!(BUILTIN_ERR_NOT_NUMBER, cmd, args[1])); + builtin_print_error_trailer(parser, streams, cmd); + return Err(STATUS_INVALID_ARGS); + } + } + } +} diff --git a/fish-rust/src/builtins/shared.rs b/fish-rust/src/builtins/shared.rs new file mode 100644 index 000000000..c9d5152aa --- /dev/null +++ b/fish-rust/src/builtins/shared.rs @@ -0,0 +1,222 @@ +use crate::builtins::wait; +use crate::ffi::{self, parser_t, wcharz_t, Repin, RustBuiltin}; +use crate::wchar::{self, wstr, L}; +use crate::wchar_ffi::{c_str, empty_wstring}; +use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; +use libc::c_int; +use std::pin::Pin; + +#[cxx::bridge] +mod builtins_ffi { + extern "C++" { + include!("wutil.h"); + include!("parser.h"); + include!("builtin.h"); + + type wcharz_t = crate::ffi::wcharz_t; + type parser_t = crate::ffi::parser_t; + type io_streams_t = crate::ffi::io_streams_t; + type RustBuiltin = crate::ffi::RustBuiltin; + } + extern "Rust" { + fn rust_run_builtin( + parser: Pin<&mut parser_t>, + streams: Pin<&mut io_streams_t>, + cpp_args: &Vec, + builtin: RustBuiltin, + status_code: &mut i32, + ) -> bool; + } + + impl Vec {} +} + +/// Error message when too many arguments are supplied to a builtin. +pub const BUILTIN_ERR_TOO_MANY_ARGUMENTS: &str = "%ls: too many arguments\n"; + +/// Error message when integer expected +pub const BUILTIN_ERR_NOT_NUMBER: &str = "%ls: %ls: invalid integer\n"; + +/// A handy return value for successful builtins. +pub const STATUS_CMD_OK: Option = Some(0); + +/// A handy return value for invalid args. +pub const STATUS_INVALID_ARGS: Option = Some(2); + +/// A wrapper around output_stream_t. +pub struct output_stream_t(*mut ffi::output_stream_t); + +impl output_stream_t { + /// \return the underlying output_stream_t. + fn ffi(&mut self) -> Pin<&mut ffi::output_stream_t> { + unsafe { (*self.0).pin() } + } + + /// Append a &wtr or WString. + pub fn append>(&mut self, s: Str) -> bool { + self.ffi().append1(c_str!(s)) + } +} + +// Convenience wrappers around C++ io_streams_t. +pub struct io_streams_t { + streams: *mut builtins_ffi::io_streams_t, + pub out: output_stream_t, + pub err: output_stream_t, +} + +impl io_streams_t { + pub fn new(mut streams: Pin<&mut builtins_ffi::io_streams_t>) -> io_streams_t { + let out = output_stream_t(streams.as_mut().get_out().unpin()); + let err = output_stream_t(streams.as_mut().get_err().unpin()); + let streams = streams.unpin(); + io_streams_t { streams, out, err } + } + + pub fn ffi_pin(&mut self) -> Pin<&mut builtins_ffi::io_streams_t> { + unsafe { Pin::new_unchecked(&mut *self.streams) } + } + + pub fn ffi_ref(&self) -> &builtins_ffi::io_streams_t { + unsafe { &*self.streams } + } +} + +fn rust_run_builtin( + parser: Pin<&mut parser_t>, + streams: Pin<&mut builtins_ffi::io_streams_t>, + cpp_args: &Vec, + builtin: RustBuiltin, + status_code: &mut i32, +) -> bool { + let mut storage = Vec::::new(); + for arg in cpp_args { + storage.push(arg.into()); + } + let mut args = Vec::new(); + for arg in &storage { + args.push(arg.as_utfstr()); + } + let streams = &mut io_streams_t::new(streams); + + match run_builtin(parser.unpin(), streams, args.as_mut_slice(), builtin) { + None => false, + Some(status) => { + *status_code = status; + true + } + } +} + +pub fn run_builtin( + parser: &mut parser_t, + streams: &mut io_streams_t, + args: &mut [&wstr], + builtin: RustBuiltin, +) -> Option { + match builtin { + RustBuiltin::Echo => super::echo::echo(parser, streams, args), + RustBuiltin::Emit => super::emit::emit(parser, streams, args), + RustBuiltin::Exit => super::exit::exit(parser, streams, args), + RustBuiltin::Return => super::r#return::r#return(parser, streams, args), + RustBuiltin::Wait => wait::wait(parser, streams, args), + } +} + +// Covers of these functions that take care of the pinning, etc. +// These all return STATUS_INVALID_ARGS. +pub fn builtin_missing_argument( + parser: &mut parser_t, + streams: &mut io_streams_t, + cmd: &wstr, + opt: &wstr, + print_hints: bool, +) { + ffi::builtin_missing_argument( + parser.pin(), + streams.ffi_pin(), + c_str!(cmd), + c_str!(opt), + print_hints, + ); +} + +pub fn builtin_unknown_option( + parser: &mut parser_t, + streams: &mut io_streams_t, + cmd: &wstr, + opt: &wstr, + print_hints: bool, +) { + ffi::builtin_unknown_option( + parser.pin(), + streams.ffi_pin(), + c_str!(cmd), + c_str!(opt), + print_hints, + ); +} + +pub fn builtin_print_help(parser: &mut parser_t, streams: &io_streams_t, cmd: &wstr) { + ffi::builtin_print_help( + parser.pin(), + streams.ffi_ref(), + c_str!(cmd), + empty_wstring(), + ); +} + +pub fn builtin_print_error_trailer(parser: &mut parser_t, streams: &mut io_streams_t, cmd: &wstr) { + ffi::builtin_print_error_trailer(parser.pin(), streams.err.ffi(), c_str!(cmd)); +} + +pub struct HelpOnlyCmdOpts { + pub print_help: bool, + pub optind: usize, +} + +impl HelpOnlyCmdOpts { + pub fn parse( + args: &mut [&wstr], + parser: &mut parser_t, + streams: &mut io_streams_t, + ) -> Result> { + let cmd = args[0]; + let print_hints = true; + + const shortopts: &wstr = L!("+:h"); + const longopts: &[woption] = &[wopt(L!("help"), woption_argument_t::no_argument, 'h')]; + + let mut print_help = false; + let mut w = wgetopter_t::new(shortopts, longopts, args); + while let Some(c) = w.wgetopt_long() { + match c { + 'h' => { + print_help = true; + } + ':' => { + builtin_missing_argument( + parser, + streams, + cmd, + args[w.woptind - 1], + print_hints, + ); + return Err(STATUS_INVALID_ARGS); + } + '?' => { + builtin_unknown_option(parser, streams, cmd, args[w.woptind - 1], print_hints); + return Err(STATUS_INVALID_ARGS); + } + _ => { + panic!("unexpected retval from wgetopter::wgetopt_long()"); + } + } + } + + Ok(HelpOnlyCmdOpts { + print_help, + optind: w.woptind, + }) + } +} diff --git a/fish-rust/src/builtins/wait.rs b/fish-rust/src/builtins/wait.rs new file mode 100644 index 000000000..36d9a8246 --- /dev/null +++ b/fish-rust/src/builtins/wait.rs @@ -0,0 +1,246 @@ +use libc::{c_int, pid_t}; + +use crate::builtins::shared::{ + builtin_missing_argument, builtin_print_help, builtin_unknown_option, io_streams_t, + STATUS_CMD_OK, STATUS_INVALID_ARGS, +}; +use crate::ffi::{job_t, parser_t, proc_wait_any, wait_handle_ref_t, Repin}; +use crate::signal::sigchecker_t; +use crate::wchar::{widestrs, wstr}; +use crate::wgetopt::{wgetopter_t, wopt, woption, woption_argument_t}; +use crate::wutil::{self, fish_wcstoi, wgettext_fmt}; + +/// \return true if we can wait on a job. +fn can_wait_on_job(j: &cxx::SharedPtr) -> bool { + j.is_constructed() && !j.is_foreground() && !j.is_stopped() +} + +/// \return true if a wait handle matches a pid or a process name. +/// For convenience, this returns false if the wait handle is null. +fn wait_handle_matches(query: WaitHandleQuery, wh: &wait_handle_ref_t) -> bool { + if wh.is_null() { + return false; + } + match query { + WaitHandleQuery::Pid(pid) => wh.get_pid().0 == pid, + WaitHandleQuery::ProcName(proc_name) => proc_name == wh.get_base_name(), + } +} + +/// \return true if all chars are numeric. +fn iswnumeric(s: &wstr) -> bool { + s.chars().all(|c| c.is_ascii_digit()) +} + +// Hack to copy wait handles into a vector. +fn get_wait_handle_list(parser: &parser_t) -> Vec { + let mut handles = Vec::new(); + let whs = parser.get_wait_handles1(); + for idx in 0..whs.size() { + handles.push(whs.get(idx)); + } + handles +} + +#[derive(Copy, Clone)] +enum WaitHandleQuery<'a> { + Pid(pid_t), + ProcName(&'a wstr), +} + +/// Walk the list of jobs, looking for a process with the given pid or proc name. +/// Append all matching wait handles to \p handles. +/// \return true if we found a matching job (even if not waitable), false if not. +fn find_wait_handles( + query: WaitHandleQuery<'_>, + parser: &parser_t, + handles: &mut Vec, +) -> bool { + // Has a job already completed? + // TODO: we can avoid traversing this list if searching by pid. + let mut matched = false; + for wh in get_wait_handle_list(parser) { + if wait_handle_matches(query, &wh) { + handles.push(wh); + matched = true; + } + } + + // Is there a running job match? + for j in parser.get_jobs() { + // We want to set 'matched' to true if we could have matched, even if the job was stopped. + let provide_handle = can_wait_on_job(j); + for proc in j.get_procs() { + let wh = proc.pin_mut().make_wait_handle(j.get_internal_job_id()); + if wait_handle_matches(query, &wh) { + matched = true; + if provide_handle { + handles.push(wh); + } + } + } + } + matched +} + +fn get_all_wait_handles(parser: &parser_t) -> Vec { + let mut result = Vec::new(); + // Get wait handles for reaped jobs. + let wait_handles = parser.get_wait_handles1(); + for idx in 0..wait_handles.size() { + result.push(wait_handles.get(idx)); + } + + // Get wait handles for running jobs. + for j in parser.get_jobs() { + if !can_wait_on_job(j) { + continue; + } + for proc_ptr in j.get_procs().iter_mut() { + let proc = proc_ptr.pin_mut(); + let wh = proc.make_wait_handle(j.get_internal_job_id()); + if !wh.is_null() { + result.push(wh); + } + } + } + result +} + +fn is_completed(wh: &wait_handle_ref_t) -> bool { + wh.is_completed() +} + +/// Wait for the given wait handles to be marked as completed. +/// If \p any_flag is set, wait for the first one; otherwise wait for all. +/// \return a status code. +fn wait_for_completion( + parser: &mut parser_t, + whs: &[wait_handle_ref_t], + any_flag: bool, +) -> Option { + if whs.is_empty() { + return Some(0); + } + + let mut sigint = sigchecker_t::new_sighupint(); + loop { + let finished = if any_flag { + whs.iter().any(is_completed) + } else { + whs.iter().all(is_completed) + }; + + if finished { + // Remove completed wait handles (at most 1 if any_flag is set). + for wh in whs { + if is_completed(wh) { + parser.pin().get_wait_handles().remove(wh); + if any_flag { + break; + } + } + } + return Some(0); + } + if sigint.check() { + return Some(128 + libc::SIGINT); + } + proc_wait_any(parser.pin()); + } +} + +#[widestrs] +pub fn wait( + parser: &mut parser_t, + streams: &mut io_streams_t, + argv: &mut [&wstr], +) -> Option { + let cmd = argv[0]; + let argc = argv.len(); + let mut any_flag = false; // flag for -n option + let mut print_help = false; + let print_hints = false; + + const shortopts: &wstr = ":nh"L; + const longopts: &[woption] = &[ + wopt("any"L, woption_argument_t::no_argument, 'n'), + wopt("help"L, woption_argument_t::no_argument, 'h'), + ]; + + let mut w = wgetopter_t::new(shortopts, longopts, argv); + while let Some(c) = w.wgetopt_long() { + match c { + 'n' => { + any_flag = true; + } + 'h' => { + print_help = true; + } + ':' => { + builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1], print_hints); + return STATUS_INVALID_ARGS; + } + '?' => { + builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1], print_hints); + return STATUS_INVALID_ARGS; + } + _ => { + panic!("unexpected retval from wgeopter.next()"); + } + } + } + + if print_help { + builtin_print_help(parser, streams, cmd); + return STATUS_CMD_OK; + } + + if w.woptind == argc { + // No jobs specified. + // Note this may succeed with an empty wait list. + return wait_for_completion(parser, &get_all_wait_handles(parser), any_flag); + } + + // Get the list of wait handles for our waiting. + let mut wait_handles: Vec = Vec::new(); + for i in w.woptind..argc { + if iswnumeric(argv[i]) { + // argument is pid + let mpid: Result = fish_wcstoi(argv[i].chars()); + if mpid.is_err() || mpid.unwrap() <= 0 { + streams.err.append(wgettext_fmt!( + "%ls: '%ls' is not a valid process id\n", + cmd, + argv[i], + )); + continue; + } + let pid = mpid.unwrap() as pid_t; + if !find_wait_handles(WaitHandleQuery::Pid(pid), parser, &mut wait_handles) { + streams.err.append(wgettext_fmt!( + "%ls: Could not find a job with process id '%d'\n", + cmd, + pid, + )); + } + } else { + // argument is process name + if !find_wait_handles( + WaitHandleQuery::ProcName(argv[i]), + parser, + &mut wait_handles, + ) { + streams.err.append(wgettext_fmt!( + "%ls: Could not find child processes with the name '%ls'\n", + cmd, + argv[i], + )); + } + } + } + if wait_handles.is_empty() { + return STATUS_INVALID_ARGS; + } + return wait_for_completion(parser, &wait_handles, any_flag); +} diff --git a/fish-rust/src/common.rs b/fish-rust/src/common.rs new file mode 100644 index 000000000..3042ad9cb --- /dev/null +++ b/fish-rust/src/common.rs @@ -0,0 +1,94 @@ +use crate::{ + ffi, + wchar_ffi::{wstr, WCharFromFFI, WString}, +}; +use std::{ffi::c_uint, mem}; + +/// A scoped manager to save the current value of some variable, and optionally set it to a new +/// value. When dropped, it restores the variable to its old value. +/// +/// This can be handy when there are multiple code paths to exit a block. +pub struct ScopedPush<'a, T> { + var: &'a mut T, + saved_value: Option, +} + +impl<'a, T> ScopedPush<'a, T> { + pub fn new(var: &'a mut T, new_value: T) -> Self { + let saved_value = mem::replace(var, new_value); + + Self { + var, + saved_value: Some(saved_value), + } + } + + pub fn restore(&mut self) { + if let Some(saved_value) = self.saved_value.take() { + *self.var = saved_value; + } + } +} + +impl<'a, T> Drop for ScopedPush<'a, T> { + fn drop(&mut self) { + self.restore() + } +} + +pub enum EscapeStringStyle { + Script(EscapeFlags), + Url, + Var, + Regex, +} + +/// Flags for the [`escape_string()`] function. These are only applicable when the escape style is +/// [`EscapeStringStyle::Script`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub struct EscapeFlags { + /// Do not escape special fish syntax characters like the semicolon. Only escape non-printable + /// characters and backslashes. + pub no_printables: bool, + /// Do not try to use 'simplified' quoted escapes, and do not use empty quotes as the empty + /// string. + pub no_quoted: bool, + /// Do not escape tildes. + pub no_tilde: bool, + /// Replace non-printable control characters with Unicode symbols. + pub symbolic: bool, +} + +/// Replace special characters with backslash escape sequences. Newline is replaced with `\n`, etc. +pub fn escape_string(s: &wstr, style: EscapeStringStyle) -> WString { + let mut flags_int = 0; + + let style = match style { + EscapeStringStyle::Script(flags) => { + const ESCAPE_NO_PRINTABLES: c_uint = 1 << 0; + const ESCAPE_NO_QUOTED: c_uint = 1 << 1; + const ESCAPE_NO_TILDE: c_uint = 1 << 2; + const ESCAPE_SYMBOLIC: c_uint = 1 << 3; + + if flags.no_printables { + flags_int |= ESCAPE_NO_PRINTABLES; + } + if flags.no_quoted { + flags_int |= ESCAPE_NO_QUOTED; + } + if flags.no_tilde { + flags_int |= ESCAPE_NO_TILDE; + } + if flags.symbolic { + flags_int |= ESCAPE_SYMBOLIC; + } + + ffi::escape_string_style_t::STRING_STYLE_SCRIPT + } + EscapeStringStyle::Url => ffi::escape_string_style_t::STRING_STYLE_URL, + EscapeStringStyle::Var => ffi::escape_string_style_t::STRING_STYLE_VAR, + EscapeStringStyle::Regex => ffi::escape_string_style_t::STRING_STYLE_REGEX, + }; + + ffi::escape_string(s.as_ptr(), flags_int.into(), style).from_ffi() +} diff --git a/fish-rust/src/fd_readable_set.rs b/fish-rust/src/fd_readable_set.rs new file mode 100644 index 000000000..4bea1248d --- /dev/null +++ b/fish-rust/src/fd_readable_set.rs @@ -0,0 +1,239 @@ +use libc::c_int; +use std::os::unix::io::RawFd; + +#[cxx::bridge] +mod fd_readable_set_ffi { + extern "Rust" { + type fd_readable_set_t; + fn new_fd_readable_set() -> Box; + fn clear(&mut self); + fn add(&mut self, fd: i32); + fn test(&self, fd: i32) -> bool; + fn check_readable(&mut self, timeout_usec: u64) -> i32; + fn is_fd_readable(fd: i32, timeout_usec: u64) -> bool; + fn poll_fd_readable(fd: i32) -> bool; + } +} + +/// Create a new fd_readable_set_t. +pub fn new_fd_readable_set() -> Box { + Box::new(fd_readable_set_t::new()) +} + +/// \return true if the fd is or becomes readable within the given timeout. +/// This returns false if the waiting is interrupted by a signal. +pub fn is_fd_readable(fd: i32, timeout_usec: u64) -> bool { + fd_readable_set_t::is_fd_readable(fd, timeout_usec) +} + +/// \return whether an fd is readable. +pub fn poll_fd_readable(fd: i32) -> bool { + fd_readable_set_t::poll_fd_readable(fd) +} + +/// A modest wrapper around select() or poll(). +/// This allows accumulating a set of fds and then seeing if they are readable. +/// This only handles readability. +/// Apple's `man poll`: "The poll() system call currently does not support devices." +#[cfg(target_os = "macos")] +pub struct fd_readable_set_t { + // The underlying fdset and nfds value to pass to select(). + fdset_: libc::fd_set, + nfds_: c_int, +} + +const kUsecPerMsec: u64 = 1000; +const kUsecPerSec: u64 = 1000 * kUsecPerMsec; + +#[cfg(target_os = "macos")] +impl fd_readable_set_t { + /// Construct an empty set. + pub fn new() -> fd_readable_set_t { + fd_readable_set_t { + fdset_: unsafe { std::mem::zeroed() }, + nfds_: 0, + } + } + + /// Reset back to an empty set. + pub fn clear(&mut self) { + self.nfds_ = 0; + unsafe { + libc::FD_ZERO(&mut self.fdset_); + } + } + + /// Add an fd to the set. The fd is ignored if negative (for convenience). + pub fn add(&mut self, fd: RawFd) { + if fd >= (libc::FD_SETSIZE as RawFd) { + //FLOGF(error, "fd %d too large for select()", fd); + return; + } + if fd >= 0 { + unsafe { libc::FD_SET(fd, &mut self.fdset_) }; + self.nfds_ = std::cmp::max(self.nfds_, fd + 1); + } + } + + /// \return true if the given fd is marked as set, in our set. \returns false if negative. + pub fn test(&self, fd: RawFd) -> bool { + fd >= 0 && unsafe { libc::FD_ISSET(fd, &self.fdset_) } + } + + /// Call select() or poll(), according to FISH_READABLE_SET_USE_POLL. Note this destructively + /// modifies the set. \return the result of select() or poll(). + pub fn check_readable(&mut self, timeout_usec: u64) -> c_int { + let null = std::ptr::null_mut(); + if timeout_usec == Self::kNoTimeout { + unsafe { + return libc::select( + self.nfds_, + &mut self.fdset_, + null, + null, + std::ptr::null_mut(), + ); + } + } else { + let mut tvs = libc::timeval { + tv_sec: (timeout_usec / kUsecPerSec) as libc::time_t, + tv_usec: (timeout_usec % kUsecPerSec) as libc::suseconds_t, + }; + unsafe { + return libc::select(self.nfds_, &mut self.fdset_, null, null, &mut tvs); + } + } + } + + /// Check if a single fd is readable, with a given timeout. + /// \return true if readable, false if not. + pub fn is_fd_readable(fd: RawFd, timeout_usec: u64) -> bool { + if fd < 0 { + return false; + } + let mut s = Self::new(); + s.add(fd); + let res = s.check_readable(timeout_usec); + return res > 0 && s.test(fd); + } + + /// Check if a single fd is readable, without blocking. + /// \return true if readable, false if not. + pub fn poll_fd_readable(fd: RawFd) -> bool { + return Self::is_fd_readable(fd, 0); + } + + /// A special timeout value which may be passed to indicate no timeout. + pub const kNoTimeout: u64 = u64::MAX; +} + +#[cfg(not(target_os = "macos"))] +pub struct fd_readable_set_t { + pollfds_: Vec, +} + +#[cfg(not(target_os = "macos"))] +impl fd_readable_set_t { + /// Construct an empty set. + pub fn new() -> fd_readable_set_t { + fd_readable_set_t { + pollfds_: Vec::new(), + } + } + + /// Reset back to an empty set. + pub fn clear(&mut self) { + self.pollfds_.clear(); + } + + #[inline] + fn pollfd_get_fd(pollfd: &libc::pollfd) -> RawFd { + pollfd.fd + } + + /// Add an fd to the set. The fd is ignored if negative (for convenience). + pub fn add(&mut self, fd: RawFd) { + if fd >= 0 { + if let Err(pos) = self.pollfds_.binary_search_by_key(&fd, Self::pollfd_get_fd) { + self.pollfds_.insert( + pos, + libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }, + ); + } + } + } + + /// \return true if the given fd is marked as set, in our set. \returns false if negative. + pub fn test(&self, fd: RawFd) -> bool { + // If a pipe is widowed with no data, Linux sets POLLHUP but not POLLIN, so test for both. + if let Ok(pos) = self.pollfds_.binary_search_by_key(&fd, Self::pollfd_get_fd) { + let pollfd = &self.pollfds_[pos]; + debug_assert_eq!(pollfd.fd, fd); + return pollfd.revents & (libc::POLLIN | libc::POLLHUP) != 0; + } + return false; + } + + // Convert from a usec to a poll-friendly msec. + fn usec_to_poll_msec(timeout_usec: u64) -> c_int { + let mut timeout_msec: u64 = timeout_usec / kUsecPerMsec; + // Round to nearest, down for halfway. + if (timeout_usec % kUsecPerMsec) > kUsecPerMsec / 2 { + timeout_msec += 1; + } + if timeout_usec == fd_readable_set_t::kNoTimeout || timeout_msec > c_int::MAX as u64 { + // Negative values mean wait forever in poll-speak. + return -1; + } + return timeout_msec as c_int; + } + + fn do_poll(fds: &mut [libc::pollfd], timeout_usec: u64) -> c_int { + let count = fds.len(); + assert!(count <= libc::nfds_t::MAX as usize, "count too big"); + return unsafe { + libc::poll( + fds.as_mut_ptr(), + count as libc::nfds_t, + Self::usec_to_poll_msec(timeout_usec), + ) + }; + } + + /// Call select() or poll(), according to FISH_READABLE_SET_USE_POLL. Note this destructively + /// modifies the set. \return the result of select() or poll(). + pub fn check_readable(&mut self, timeout_usec: u64) -> c_int { + if self.pollfds_.is_empty() { + return 0; + } + return Self::do_poll(&mut self.pollfds_, timeout_usec); + } + + /// Check if a single fd is readable, with a given timeout. + /// \return true if readable, false if not. + pub fn is_fd_readable(fd: RawFd, timeout_usec: u64) -> bool { + if fd < 0 { + return false; + } + let mut pfd = libc::pollfd { + fd, + events: libc::POLLIN, + revents: 0, + }; + let ret = Self::do_poll(std::slice::from_mut(&mut pfd), timeout_usec); + return ret > 0 && (pfd.revents & libc::POLLIN) != 0; + } + + /// Check if a single fd is readable, without blocking. + /// \return true if readable, false if not. + pub fn poll_fd_readable(fd: RawFd) -> bool { + return Self::is_fd_readable(fd, 0); + } + + /// A special timeout value which may be passed to indicate no timeout. + pub const kNoTimeout: u64 = u64::MAX; +} diff --git a/fish-rust/src/fds.rs b/fish-rust/src/fds.rs new file mode 100644 index 000000000..a7092c644 --- /dev/null +++ b/fish-rust/src/fds.rs @@ -0,0 +1,88 @@ +use crate::ffi; +use nix::unistd; +use std::os::unix::io::RawFd; + +/// A helper type for managing and automatically closing a file descriptor +pub struct autoclose_fd_t { + fd_: RawFd, +} + +impl autoclose_fd_t { + // Closes the fd if not already closed. + pub fn close(&mut self) { + if self.fd_ != -1 { + _ = unistd::close(self.fd_); + self.fd_ = -1; + } + } + + // Returns the fd. + pub fn fd(&self) -> RawFd { + self.fd_ + } + + // Returns the fd, transferring ownership to the caller. + pub fn acquire(&mut self) -> RawFd { + let temp = self.fd_; + self.fd_ = -1; + temp + } + + // Resets to a new fd, taking ownership. + pub fn reset(&mut self, fd: RawFd) { + if fd == self.fd_ { + return; + } + self.close(); + self.fd_ = fd; + } + + // \return if this has a valid fd. + pub fn valid(&self) -> bool { + self.fd_ >= 0 + } + + // Construct, taking ownership of an fd. + pub fn new(fd: RawFd) -> autoclose_fd_t { + autoclose_fd_t { fd_: fd } + } +} + +impl Default for autoclose_fd_t { + fn default() -> autoclose_fd_t { + autoclose_fd_t { fd_: -1 } + } +} + +impl Drop for autoclose_fd_t { + fn drop(&mut self) { + self.close() + } +} + +/// Helper type returned from make_autoclose_pipes. +#[derive(Default)] +pub struct autoclose_pipes_t { + /// Read end of the pipe. + pub read: autoclose_fd_t, + + /// Write end of the pipe. + pub write: autoclose_fd_t, +} + +/// Construct a pair of connected pipes, set to close-on-exec. +/// \return None on fd exhaustion. +pub fn make_autoclose_pipes() -> Option { + let pipes = ffi::make_pipes_ffi(); + + let readp = autoclose_fd_t::new(pipes.read); + let writep = autoclose_fd_t::new(pipes.write); + if !readp.valid() || !writep.valid() { + None + } else { + Some(autoclose_pipes_t { + read: readp, + write: writep, + }) + } +} diff --git a/fish-rust/src/ffi.rs b/fish-rust/src/ffi.rs new file mode 100644 index 000000000..b39db82fc --- /dev/null +++ b/fish-rust/src/ffi.rs @@ -0,0 +1,137 @@ +use crate::wchar; +#[rustfmt::skip] +use ::std::pin::Pin; +#[rustfmt::skip] +use ::std::slice; +use autocxx::prelude::*; +use cxx::SharedPtr; + +// autocxx has been hacked up to know about this. +pub type wchar_t = u32; + +include_cpp! { + #include "fds.h" + #include "wutil.h" + #include "flog.h" + #include "io.h" + #include "parse_util.h" + #include "wildcard.h" + #include "tokenizer.h" + #include "parser.h" + #include "proc.h" + #include "common.h" + #include "builtin.h" + #include "fallback.h" + #include "event.h" + + safety!(unsafe_ffi) + + generate_pod!("wcharz_t") + generate!("make_fd_nonblocking") + generate!("wperror") + + generate_pod!("pipes_ffi_t") + generate!("make_pipes_ffi") + + generate!("valid_var_name_char") + + generate!("get_flog_file_fd") + + generate!("parse_util_unescape_wildcards") + + generate!("fish_wcwidth") + generate!("fish_wcswidth") + + generate!("wildcard_match") + generate!("wgettext_ptr") + + generate!("parser_t") + generate!("job_t") + generate!("process_t") + generate!("library_data_t") + + generate!("proc_wait_any") + + generate!("output_stream_t") + generate!("io_streams_t") + + generate_pod!("RustFFIJobList") + generate_pod!("RustFFIProcList") + generate_pod!("RustBuiltin") + + generate!("builtin_missing_argument") + generate!("builtin_unknown_option") + generate!("builtin_print_help") + generate!("builtin_print_error_trailer") + + generate!("wait_handle_t") + generate!("wait_handle_store_t") + + generate!("event_fire_generic") + + generate!("escape_string") + generate!("sig2wcs") + generate!("wcs2sig") + generate!("signal_get_desc") +} + +impl parser_t { + pub fn get_jobs(&self) -> &[SharedPtr] { + let ffi_jobs = self.ffi_jobs(); + unsafe { slice::from_raw_parts(ffi_jobs.jobs, ffi_jobs.count) } + } +} + +impl job_t { + #[allow(clippy::mut_from_ref)] + pub fn get_procs(&self) -> &mut [UniquePtr] { + let ffi_procs = self.ffi_processes(); + unsafe { slice::from_raw_parts_mut(ffi_procs.procs, ffi_procs.count) } + } +} + +/// Allow wcharz_t to be "into" wstr. +impl From for &wchar::wstr { + fn from(w: wcharz_t) -> Self { + let len = w.length(); + let v = unsafe { slice::from_raw_parts(w.str_ as *const u32, len) }; + wchar::wstr::from_slice(v).expect("Invalid UTF-32") + } +} + +/// Allow wcharz_t to be "into" WString. +impl From for wchar::WString { + fn from(w: wcharz_t) -> Self { + let len = w.length(); + let v = unsafe { slice::from_raw_parts(w.str_ as *const u32, len).to_vec() }; + Self::from_vec(v).expect("Invalid UTF-32") + } +} + +/// A bogus trait for turning &mut Foo into Pin<&mut Foo>. +/// autocxx enforces that non-const methods must be called through Pin, +/// but this means we can't pass around mutable references to types like parser_t. +/// We also don't want to assert that parser_t is Unpin. +/// So we just allow constructing a pin from a mutable reference; none of the C++ code. +/// It's worth considering disabling this in cxx; for now we use this trait. +/// Eventually parser_t and io_streams_t will not require Pin so we just unsafe-it away. +pub trait Repin { + fn pin(&mut self) -> Pin<&mut Self> { + unsafe { Pin::new_unchecked(self) } + } + + fn unpin(self: Pin<&mut Self>) -> &mut Self { + unsafe { self.get_unchecked_mut() } + } +} + +// Implement Repin for our types. +impl Repin for parser_t {} +impl Repin for job_t {} +impl Repin for process_t {} +impl Repin for io_streams_t {} +impl Repin for output_stream_t {} + +pub use autocxx::c_int; +pub use ffi::*; +pub use libc::c_char; diff --git a/fish-rust/src/ffi_init.rs b/fish-rust/src/ffi_init.rs new file mode 100644 index 000000000..95293e8e2 --- /dev/null +++ b/fish-rust/src/ffi_init.rs @@ -0,0 +1,27 @@ +/// Bridged functions concerned with initialization. +use crate::ffi::wcharz_t; + +#[cxx::bridge] +mod ffi2 { + + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + extern "Rust" { + fn rust_init(); + fn rust_activate_flog_categories_by_pattern(wc_ptr: wcharz_t); + } +} + +/// Entry point for Rust-specific initialization. +fn rust_init() { + crate::topic_monitor::topic_monitor_init(); + crate::future_feature_flags::future_feature_flags_init(); +} + +/// FFI bridge for activate_flog_categories_by_pattern(). +fn rust_activate_flog_categories_by_pattern(wc_ptr: wcharz_t) { + crate::flog::activate_flog_categories_by_pattern(wc_ptr.into()); +} diff --git a/fish-rust/src/ffi_tests.rs b/fish-rust/src/ffi_tests.rs new file mode 100644 index 000000000..d5427c24e --- /dev/null +++ b/fish-rust/src/ffi_tests.rs @@ -0,0 +1,63 @@ +//! Support for tests which need to cross the FFI. +//! +//! Because the C++ is not compiled by `cargo test` and there is no natural way to +//! do it, use the following facilities for tests which need to use C++ types. +//! This uses the inventory crate to build a custom-test harness +//! as described at +//! See smoke.rs add_test for an example of how to use this. + +#[cfg(all(feature = "fish-ffi-tests", not(test)))] +mod ffi_tests_impl { + /// A test which needs to cross the FFI. + #[derive(Debug)] + pub struct FFITest { + pub name: &'static str, + pub func: fn(), + } + + /// Add a new test. + /// Example usage: + /// ``` + /// add_test!("test_name", || { + /// assert!(1 + 2 == 3); + /// }); + /// ``` + macro_rules! add_test { + ($name:literal, $func:expr) => { + inventory::submit!(crate::ffi_tests::FFITest { + name: $name, + func: $func, + }); + }; + } + pub(crate) use add_test; + + inventory::collect!(crate::ffi_tests::FFITest); + + /// Runs all ffi tests. + pub fn run_ffi_tests() { + for test in inventory::iter:: { + println!("Running ffi test {}", test.name); + (test.func)(); + } + } +} + +#[cfg(not(all(feature = "fish-ffi-tests", not(test))))] +mod ffi_tests_impl { + macro_rules! add_test { + ($name:literal, $func:expr) => {}; + } + pub(crate) use add_test; + pub fn run_ffi_tests() {} +} + +pub(crate) use ffi_tests_impl::*; + +#[allow(clippy::module_inception)] +#[cxx::bridge(namespace = rust)] +mod ffi_tests { + extern "Rust" { + fn run_ffi_tests(); + } +} diff --git a/fish-rust/src/flog.rs b/fish-rust/src/flog.rs new file mode 100644 index 000000000..54550f429 --- /dev/null +++ b/fish-rust/src/flog.rs @@ -0,0 +1,198 @@ +use crate::ffi::{get_flog_file_fd, parse_util_unescape_wildcards, wildcard_match}; +use crate::wchar::{widestrs, wstr, WString}; +use crate::wchar_ffi::WCharToFFI; +use std::io::Write; +use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd}; +use std::sync::atomic::Ordering; + +#[rustfmt::skip::macros(category)] +#[widestrs] +pub mod categories { + use super::wstr; + use std::sync::atomic::AtomicBool; + + pub struct category_t { + pub name: &'static wstr, + pub description: &'static wstr, + pub enabled: AtomicBool, + } + + /// Macro to declare a static variable identified by $var, + /// with the given name and description, and optionally enabled by default. + macro_rules! declare_category { + ( + ($var:ident, $name:expr, $description:expr, $enabled:expr) + ) => { + pub static $var: category_t = category_t { + name: $name, + description: $description, + enabled: AtomicBool::new($enabled), + }; + }; + ( + ($var:ident, $name:expr, $description:expr) + ) => { + declare_category!(($var, $name, $description, false)); + }; + } + + /// Macro to extract the variable name for a category. + macro_rules! category_name { + (($var:ident, $name:expr, $description:expr, $enabled:expr)) => { + $var + }; + (($var:ident, $name:expr, $description:expr)) => { + $var + }; + } + + macro_rules! categories { + ( + // A repetition of categories, separated by semicolons. + $($cats:tt);* + + // Allow trailing semicolon. + $(;)? + ) => { + // Declare each category. + $( + declare_category!($cats); + )* + + // Define a function which gives you a Vector of all categories. + pub fn all_categories() -> Vec<&'static category_t> { + vec![ + $( + & category_name!($cats), + )* + ] + } + }; + } + + categories!( + (error, "error"L, "Serious unexpected errors (on by default)"L, true); + + (debug, "debug"L, "Debugging aid (on by default)"L, true); + + (warning, "warning"L, "Warnings (on by default)"L, true); + + (warning_path, "warning-path"L, "Warnings about unusable paths for config/history (on by default)"L, true); + + (config, "config"L, "Finding and reading configuration"L); + + (event, "event"L, "Firing events"L); + + (exec, "exec"L, "Errors reported by exec (on by default)"L, true); + + (exec_job_status, "exec-job-status"L, "Jobs changing status"L); + + (exec_job_exec, "exec-job-exec"L, "Jobs being executed"L); + + (exec_fork, "exec-fork"L, "Calls to fork()"L); + + (output_invalid, "output-invalid"L, "Trying to print invalid output"L); + (ast_construction, "ast-construction"L, "Parsing fish AST"L); + + (proc_job_run, "proc-job-run"L, "Jobs getting started or continued"L); + + (proc_termowner, "proc-termowner"L, "Terminal ownership events"L); + + (proc_internal_proc, "proc-internal-proc"L, "Internal (non-forked) process events"L); + + (proc_reap_internal, "proc-reap-internal"L, "Reaping internal (non-forked) processes"L); + + (proc_reap_external, "proc-reap-external"L, "Reaping external (forked) processes"L); + (proc_pgroup, "proc-pgroup"L, "Process groups"L); + + (env_locale, "env-locale"L, "Changes to locale variables"L); + + (env_export, "env-export"L, "Changes to exported variables"L); + + (env_dispatch, "env-dispatch"L, "Reacting to variables"L); + + (uvar_file, "uvar-file"L, "Writing/reading the universal variable store"L); + (uvar_notifier, "uvar-notifier"L, "Notifications about universal variable changes"L); + + (topic_monitor, "topic-monitor"L, "Internal details of the topic monitor"L); + (char_encoding, "char-encoding"L, "Character encoding issues"L); + + (history, "history"L, "Command history events"L); + (history_file, "history-file"L, "Reading/Writing the history file"L); + + (profile_history, "profile-history"L, "History performance measurements"L); + + (iothread, "iothread"L, "Background IO thread events"L); + (fd_monitor, "fd-monitor"L, "FD monitor events"L); + + (term_support, "term-support"L, "Terminal feature detection"L); + + (reader, "reader"L, "The interactive reader/input system"L); + (reader_render, "reader-render"L, "Rendering the command line"L); + (complete, "complete"L, "The completion system"L); + (path, "path"L, "Searching/using paths"L); + + (screen, "screen"L, "Screen repaints"L); + ); +} + +/// Write to our FLOG file. +pub fn flog_impl(s: &str) { + let fd = get_flog_file_fd().0 as RawFd; + if fd < 0 { + return; + } + let mut file = unsafe { std::fs::File::from_raw_fd(fd) }; + let _ = file.write(s.as_bytes()); + // Ensure the file is not closed. + file.into_raw_fd(); +} + +macro_rules! FLOG { + ($category:ident, $($elem:expr),+) => { + if crate::flog::categories::$category.enabled.load(std::sync::atomic::Ordering::Relaxed) { + let mut vs = Vec::new(); + $( + vs.push(format!("{:?}", $elem)); + )+ + // We don't use locking here so we have to append our own newline to avoid multiple writes. + let mut v = vs.join(" "); + v.push('\n'); + crate::flog::flog_impl(&v); + } + }; +} +pub(crate) use FLOG; + +/// For each category, if its name matches the wildcard, set its enabled to the given sense. +fn apply_one_wildcard(wc_esc: &wstr, sense: bool) { + let wc = parse_util_unescape_wildcards(&wc_esc.to_ffi()); + let mut match_found = false; + for cat in categories::all_categories() { + if wildcard_match(&cat.name.to_ffi(), &wc, false) { + cat.enabled.store(sense, Ordering::Relaxed); + match_found = true; + } + } + if !match_found { + eprintln!("Failed to match debug category: {wc_esc}"); + } +} + +/// Set the active flog categories according to the given wildcard \p wc. +pub fn activate_flog_categories_by_pattern(wc_ptr: &wstr) { + let mut wc: WString = wc_ptr.into(); + // Normalize underscores to dashes, allowing the user to be sloppy. + for c in wc.as_char_slice_mut() { + if *c == '_' { + *c = '-'; + } + } + for s in wc.as_char_slice().split(|c| *c == ',') { + if s.starts_with(&['-']) { + apply_one_wildcard(wstr::from_char_slice(&s[1..]), false); + } else { + apply_one_wildcard(wstr::from_char_slice(s), true); + } + } +} diff --git a/fish-rust/src/future_feature_flags.rs b/fish-rust/src/future_feature_flags.rs new file mode 100644 index 000000000..1eeeb8781 --- /dev/null +++ b/fish-rust/src/future_feature_flags.rs @@ -0,0 +1,254 @@ +//! Flags to enable upcoming features + +use crate::ffi::wcharz_t; +use crate::wchar::wstr; +use crate::wchar_ffi::WCharToFFI; +use std::array; +use std::cell::UnsafeCell; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use widestring_suffix::widestrs; + +#[cxx::bridge] +mod future_feature_flags_ffi { + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + /// The list of flags. + #[repr(u8)] + enum FeatureFlag { + /// Whether ^ is supported for stderr redirection. + stderr_nocaret, + + /// Whether ? is supported as a glob. + qmark_noglob, + + /// Whether string replace -r double-unescapes the replacement. + string_replace_backslash, + + /// Whether "&" is not-special if followed by a word character. + ampersand_nobg_in_token, + } + + /// Metadata about feature flags. + struct feature_metadata_t { + flag: FeatureFlag, + name: UniquePtr, + groups: UniquePtr, + description: UniquePtr, + default_value: bool, + read_only: bool, + } + + extern "Rust" { + type Features; + fn test(self: &Features, flag: FeatureFlag) -> bool; + fn set(self: &mut Features, flag: FeatureFlag, value: bool); + fn set_from_string(self: &mut Features, str: wcharz_t); + fn fish_features() -> *const Features; + fn feature_test(flag: FeatureFlag) -> bool; + fn mutable_fish_features() -> *mut Features; + fn feature_metadata() -> [feature_metadata_t; 4]; + } +} + +pub use future_feature_flags_ffi::{feature_metadata_t, FeatureFlag}; + +pub struct Features { + // Values for the flags. + // These are atomic to "fix" a race reported by tsan where tests of feature flags and other + // tests which use them conceptually race. + values: [AtomicBool; metadata.len()], +} + +/// Metadata about feature flags. +struct FeatureMetadata { + /// The flag itself. + flag: FeatureFlag, + + /// User-presentable short name of the feature flag. + name: &'static wstr, + + /// Comma-separated list of feature groups. + groups: &'static wstr, + + /// User-presentable description of the feature flag. + description: &'static wstr, + + /// Default flag value. + default_value: bool, + + /// Whether the value can still be changed or not. + read_only: bool, +} + +impl From<&FeatureMetadata> for feature_metadata_t { + fn from(md: &FeatureMetadata) -> feature_metadata_t { + feature_metadata_t { + flag: md.flag, + name: md.name.to_ffi(), + groups: md.groups.to_ffi(), + description: md.description.to_ffi(), + default_value: md.default_value, + read_only: md.read_only, + } + } +} + +/// The metadata, indexed by flag. +#[widestrs] +const metadata: [FeatureMetadata; 4] = [ + FeatureMetadata { + flag: FeatureFlag::stderr_nocaret, + name: "stderr-nocaret"L, + groups: "3.0"L, + description: "^ no longer redirects stderr (historical, can no longer be changed)"L, + default_value: true, + read_only: true, + }, + FeatureMetadata { + flag: FeatureFlag::qmark_noglob, + name: "qmark-noglob"L, + groups: "3.0"L, + description: "? no longer globs"L, + default_value: false, + read_only: false, + }, + FeatureMetadata { + flag: FeatureFlag::string_replace_backslash, + name: "regex-easyesc"L, + groups: "3.1"L, + description: "string replace -r needs fewer \\'s"L, + default_value: true, + read_only: false, + }, + FeatureMetadata { + flag: FeatureFlag::ampersand_nobg_in_token, + name: "ampersand-nobg-in-token"L, + groups: "3.4"L, + description: "& only backgrounds if followed by a separator"L, + default_value: true, + read_only: false, + }, +]; + +/// The singleton shared feature set. +static mut global_features: *const UnsafeCell = std::ptr::null(); + +pub fn future_feature_flags_init() { + unsafe { + // Leak it for now. + global_features = Box::into_raw(Box::new(UnsafeCell::new(Features::new()))); + } +} + +impl Features { + fn new() -> Self { + Features { + values: array::from_fn(|i| AtomicBool::new(metadata[i].default_value)), + } + } + + /// Return whether a flag is set. + pub fn test(&self, flag: FeatureFlag) -> bool { + self.values[flag.repr as usize].load(Ordering::SeqCst) + } + + /// Set a flag. + pub fn set(&mut self, flag: FeatureFlag, value: bool) { + self.values[flag.repr as usize].store(value, Ordering::SeqCst) + } + + /// Parses a comma-separated feature-flag string, updating ourselves with the values. + /// Feature names or group names may be prefixed with "no-" to disable them. + /// The special group name "all" may be used for those who like to live on the edge. + /// Unknown features are silently ignored. + #[widestrs] + pub fn set_from_string<'a>(&mut self, str: impl Into<&'a wstr>) { + let str: &wstr = str.into(); + let whitespace = "\t\n\0x0B\0x0C\r "L.as_char_slice(); + for entry in str.as_char_slice().split(|c| *c == ',') { + if entry.is_empty() { + continue; + } + + // Trim leading and trailing whitespace + let entry = &entry[entry.iter().take_while(|c| whitespace.contains(c)).count()..]; + let entry = + &entry[..entry.len() - entry.iter().take_while(|c| whitespace.contains(c)).count()]; + + // A "no-" prefix inverts the sense. + let (name, value) = match entry.strip_prefix("no-"L.as_char_slice()) { + Some(suffix) => (suffix, false), + None => (entry, true), + }; + // Look for a feature with this name. If we don't find it, assume it's a group name and set + // all features whose group contain it. Do nothing even if the string is unrecognized; this + // is to allow uniform invocations of fish (e.g. disable a feature that is only present in + // future versions). + // The special name 'all' may be used for those who like to live on the edge. + if let Some(md) = metadata.iter().find(|md| md.name == name) { + // Only change it if it's not read-only. + // Don't complain if it is, this is typically set from a variable. + if !md.read_only { + self.set(md.flag, value); + } + } else { + for md in &metadata { + if md.groups == name || name == "all"L { + if !md.read_only { + self.set(md.flag, value); + } + } + } + } + } + } +} + +/// Return the global set of features for fish. This is const to prevent accidental mutation. +pub fn fish_features() -> *const Features { + unsafe { (*global_features).get() } +} + +/// Perform a feature test on the global set of features. +pub fn feature_test(flag: FeatureFlag) -> bool { + unsafe { &*(*global_features).get() }.test(flag) +} + +/// Return the global set of features for fish, but mutable. In general fish features should be set +/// at startup only. +pub fn mutable_fish_features() -> *mut Features { + unsafe { (*global_features).get() } +} + +// The metadata, indexed by flag. +pub fn feature_metadata() -> [feature_metadata_t; metadata.len()] { + array::from_fn(|i| (&metadata[i]).into()) +} + +#[test] +#[widestrs] +fn test_feature_flags() { + let mut f = Features::new(); + f.set_from_string("stderr-nocaret,nonsense"L); + assert!(f.test(FeatureFlag::stderr_nocaret)); + f.set_from_string("stderr-nocaret,no-stderr-nocaret,nonsense"L); + assert!(f.test(FeatureFlag::stderr_nocaret)); + + // Ensure every metadata is represented once. + let mut counts: [usize; metadata.len()] = [0; metadata.len()]; + for md in &metadata { + counts[md.flag.repr as usize] += 1; + } + for count in counts { + assert_eq!(count, 1); + } + + assert_eq!( + metadata[FeatureFlag::stderr_nocaret.repr as usize].name, + "stderr-nocaret"L + ); +} diff --git a/fish-rust/src/lib.rs b/fish-rust/src/lib.rs new file mode 100644 index 000000000..9a12c3e69 --- /dev/null +++ b/fish-rust/src/lib.rs @@ -0,0 +1,35 @@ +#![allow(non_camel_case_types)] +#![allow(dead_code)] +#![allow(non_upper_case_globals)] +#![allow(clippy::needless_return)] +#![allow(clippy::manual_is_ascii_check)] + +mod common; +mod fd_readable_set; +mod fds; +#[allow(rustdoc::broken_intra_doc_links)] +#[allow(clippy::module_inception)] +#[allow(clippy::new_ret_no_self)] +#[allow(clippy::wrong_self_convention)] +#[allow(clippy::needless_lifetimes)] +mod ffi; +mod ffi_init; +mod ffi_tests; +mod flog; +mod future_feature_flags; +mod nix; +mod parse_constants; +mod redirection; +mod signal; +mod smoke; +mod timer; +mod tokenizer; +mod topic_monitor; +mod util; +mod wchar; +mod wchar_ext; +mod wchar_ffi; +mod wgetopt; +mod wutil; + +mod builtins; diff --git a/fish-rust/src/nix.rs b/fish-rust/src/nix.rs new file mode 100644 index 000000000..e7b0bda8a --- /dev/null +++ b/fish-rust/src/nix.rs @@ -0,0 +1,23 @@ +//! Safe wrappers around various libc functions that we might want to reuse across modules. + +use std::time::Duration; + +pub const fn timeval_to_duration(val: &libc::timeval) -> Duration { + let micros = val.tv_sec as i64 * (1E6 as i64) + val.tv_usec as i64; + Duration::from_micros(micros as u64) +} + +pub trait TimevalExt { + fn as_micros(&self) -> i64; + fn as_duration(&self) -> Duration; +} + +impl TimevalExt for libc::timeval { + fn as_micros(&self) -> i64 { + timeval_to_duration(self).as_micros() as i64 + } + + fn as_duration(&self) -> Duration { + timeval_to_duration(self) + } +} diff --git a/fish-rust/src/parse_constants.rs b/fish-rust/src/parse_constants.rs new file mode 100644 index 000000000..0118c8f03 --- /dev/null +++ b/fish-rust/src/parse_constants.rs @@ -0,0 +1,724 @@ +//! Constants used in the programmatic representation of fish code. + +use crate::ffi::{fish_wcswidth, fish_wcwidth, wcharz_t}; +use crate::tokenizer::variable_assignment_equals_pos; +use crate::wchar::{wstr, WString, L}; +use crate::wchar_ffi::{wcharz, WCharFromFFI, WCharToFFI}; +use crate::wutil::{sprintf, wgettext_fmt}; +use cxx::{CxxWString, UniquePtr}; +use std::ops::{BitAnd, BitOrAssign}; +use widestring_suffix::widestrs; + +type SourceOffset = u32; + +pub const SOURCE_OFFSET_INVALID: SourceOffset = SourceOffset::MAX; +pub const SOURCE_LOCATION_UNKNOWN: usize = usize::MAX; + +pub struct ParseTreeFlags(u8); + +pub const PARSE_FLAG_NONE: ParseTreeFlags = ParseTreeFlags(0); +/// attempt to build a "parse tree" no matter what. this may result in a 'forest' of +/// disconnected trees. this is intended to be used by syntax highlighting. +pub const PARSE_FLAG_CONTINUE_AFTER_ERROR: ParseTreeFlags = ParseTreeFlags(1 << 0); +/// include comment tokens. +pub const PARSE_FLAG_INCLUDE_COMMENTS: ParseTreeFlags = ParseTreeFlags(1 << 1); +/// indicate that the tokenizer should accept incomplete tokens */ +pub const PARSE_FLAG_ACCEPT_INCOMPLETE_TOKENS: ParseTreeFlags = ParseTreeFlags(1 << 2); +/// indicate that the parser should not generate the terminate token, allowing an 'unfinished' +/// tree where some nodes may have no productions. +pub const PARSE_FLAG_LEAVE_UNTERMINATED: ParseTreeFlags = ParseTreeFlags(1 << 3); +/// indicate that the parser should generate job_list entries for blank lines. +pub const PARSE_FLAG_SHOW_BLANK_LINES: ParseTreeFlags = ParseTreeFlags(1 << 4); +/// indicate that extra semis should be generated. +pub const PARSE_FLAG_SHOW_EXTRA_SEMIS: ParseTreeFlags = ParseTreeFlags(1 << 5); + +impl BitAnd for ParseTreeFlags { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitOrAssign for ParseTreeFlags { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0 + } +} + +#[derive(PartialEq, Eq)] +pub struct ParserTestErrorBits(u8); + +pub const PARSER_TEST_ERROR: ParserTestErrorBits = ParserTestErrorBits(1); +pub const PARSER_TEST_INCOMPLETE: ParserTestErrorBits = ParserTestErrorBits(2); + +impl BitAnd for ParserTestErrorBits { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitOrAssign for ParserTestErrorBits { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0 + } +} + +#[cxx::bridge] +mod parse_constants_ffi { + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + /// A range of source code. + #[derive(PartialEq, Eq)] + struct SourceRange { + start: u32, + length: u32, + } + + extern "Rust" { + fn end(self: &SourceRange) -> u32; + fn contains_inclusive(self: &SourceRange, loc: u32) -> bool; + } + + /// IMPORTANT: If the following enum table is modified you must also update token_type_description below. + /// TODO above comment can be removed when we drop the FFI and get real enums. + enum ParseTokenType { + invalid = 1, + + // Terminal types. + string, + pipe, + redirection, + background, + andand, + oror, + end, + // Special terminal type that means no more tokens forthcoming. + terminate, + // Very special terminal types that don't appear in the production list. + error, + tokenizer_error, + comment, + } + + #[repr(u8)] + enum ParseKeyword { + // 'none' is not a keyword, it is a sentinel indicating nothing. + none, + + kw_and, + kw_begin, + kw_builtin, + kw_case, + kw_command, + kw_else, + kw_end, + kw_exclam, + kw_exec, + kw_for, + kw_function, + kw_if, + kw_in, + kw_not, + kw_or, + kw_switch, + kw_time, + kw_while, + } + + extern "Rust" { + fn token_type_description(token_type: ParseTokenType) -> wcharz_t; + fn keyword_description(keyword: ParseKeyword) -> wcharz_t; + fn keyword_from_string(s: wcharz_t) -> ParseKeyword; + } + + // Statement decorations like 'command' or 'exec'. + enum StatementDecoration { + none, + command, + builtin, + exec, + } + + // Parse error code list. + enum ParseErrorCode { + none, + + // Matching values from enum parser_error. + syntax, + cmdsubst, + + generic, // unclassified error types + + // Tokenizer errors. + tokenizer_unterminated_quote, + tokenizer_unterminated_subshell, + tokenizer_unterminated_slice, + tokenizer_unterminated_escape, + tokenizer_other, + + unbalancing_end, // end outside of block + unbalancing_else, // else outside of if + unbalancing_case, // case outside of switch + bare_variable_assignment, // a=b without command + andor_in_pipeline, // "and" or "or" after a pipe + } + + struct parse_error_t { + text: UniquePtr, + code: ParseErrorCode, + source_start: usize, + source_length: usize, + } + + extern "Rust" { + type ParseError; + fn code(self: &ParseError) -> ParseErrorCode; + fn source_start(self: &ParseError) -> usize; + fn text(self: &ParseError) -> UniquePtr; + + #[cxx_name = "describe"] + fn describe_ffi( + self: &ParseError, + src: &CxxWString, + is_interactive: bool, + ) -> UniquePtr; + #[cxx_name = "describe_with_prefix"] + fn describe_with_prefix_ffi( + self: &ParseError, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr; + + fn describe_with_prefix( + self: &parse_error_t, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr; + + type ParseErrorList; + fn new_parse_error_list() -> Box; + #[cxx_name = "offset_source_start"] + fn offset_source_start_ffi(self: &mut ParseErrorList, amt: usize); + fn size(self: &ParseErrorList) -> usize; + fn at(self: &ParseErrorList, offset: usize) -> *const ParseError; + fn empty(self: &ParseErrorList) -> bool; + fn push_back(self: &mut ParseErrorList, error: &parse_error_t); + fn append(self: &mut ParseErrorList, other: *mut ParseErrorList); + fn erase(self: &mut ParseErrorList, index: usize); + fn clear(self: &mut ParseErrorList); + } + + extern "Rust" { + #[cxx_name = "token_type_user_presentable_description"] + fn token_type_user_presentable_description_ffi( + type_: ParseTokenType, + keyword: ParseKeyword, + ) -> UniquePtr; + } + + // The location of a pipeline. + enum PipelinePosition { + none, // not part of a pipeline + first, // first command in a pipeline + subsequent, // second or further command in a pipeline + } +} + +pub use parse_constants_ffi::{ + parse_error_t, ParseErrorCode, ParseKeyword, ParseTokenType, SourceRange, +}; + +impl SourceRange { + fn end(&self) -> SourceOffset { + self.start.checked_add(self.length).expect("Overflow") + } + + // \return true if a location is in this range, including one-past-the-end. + fn contains_inclusive(&self, loc: SourceOffset) -> bool { + self.start <= loc && loc - self.start <= self.length + } +} + +impl From for &'static wstr { + #[widestrs] + fn from(token_type: ParseTokenType) -> Self { + match token_type { + ParseTokenType::comment => "ParseTokenType::comment"L, + ParseTokenType::error => "ParseTokenType::error"L, + ParseTokenType::tokenizer_error => "ParseTokenType::tokenizer_error"L, + ParseTokenType::background => "ParseTokenType::background"L, + ParseTokenType::end => "ParseTokenType::end"L, + ParseTokenType::pipe => "ParseTokenType::pipe"L, + ParseTokenType::redirection => "ParseTokenType::redirection"L, + ParseTokenType::string => "ParseTokenType::string"L, + ParseTokenType::andand => "ParseTokenType::andand"L, + ParseTokenType::oror => "ParseTokenType::oror"L, + ParseTokenType::terminate => "ParseTokenType::terminate"L, + ParseTokenType::invalid => "ParseTokenType::invalid"L, + _ => "unknown token type"L, + } + } +} + +fn token_type_description(token_type: ParseTokenType) -> wcharz_t { + let s: &'static wstr = token_type.into(); + wcharz!(s) +} + +impl From for &'static wstr { + #[widestrs] + fn from(keyword: ParseKeyword) -> Self { + match keyword { + ParseKeyword::kw_exclam => "!"L, + ParseKeyword::kw_and => "and"L, + ParseKeyword::kw_begin => "begin"L, + ParseKeyword::kw_builtin => "builtin"L, + ParseKeyword::kw_case => "case"L, + ParseKeyword::kw_command => "command"L, + ParseKeyword::kw_else => "else"L, + ParseKeyword::kw_end => "end"L, + ParseKeyword::kw_exec => "exec"L, + ParseKeyword::kw_for => "for"L, + ParseKeyword::kw_function => "function"L, + ParseKeyword::kw_if => "if"L, + ParseKeyword::kw_in => "in"L, + ParseKeyword::kw_not => "not"L, + ParseKeyword::kw_or => "or"L, + ParseKeyword::kw_switch => "switch"L, + ParseKeyword::kw_time => "time"L, + ParseKeyword::kw_while => "while"L, + _ => "unknown_keyword"L, + } + } +} + +fn keyword_description(keyword: ParseKeyword) -> wcharz_t { + let s: &'static wstr = keyword.into(); + wcharz!(s) +} + +impl From<&wstr> for ParseKeyword { + fn from(s: &wstr) -> Self { + let s: Vec = s.encode_utf8().collect(); + match unsafe { std::str::from_utf8_unchecked(&s) } { + "!" => ParseKeyword::kw_exclam, + "and" => ParseKeyword::kw_and, + "begin" => ParseKeyword::kw_begin, + "builtin" => ParseKeyword::kw_builtin, + "case" => ParseKeyword::kw_case, + "command" => ParseKeyword::kw_command, + "else" => ParseKeyword::kw_else, + "end" => ParseKeyword::kw_end, + "exec" => ParseKeyword::kw_exec, + "for" => ParseKeyword::kw_for, + "function" => ParseKeyword::kw_function, + "if" => ParseKeyword::kw_if, + "in" => ParseKeyword::kw_in, + "not" => ParseKeyword::kw_not, + "or" => ParseKeyword::kw_or, + "switch" => ParseKeyword::kw_switch, + "time" => ParseKeyword::kw_time, + "while" => ParseKeyword::kw_while, + _ => ParseKeyword::none, + } + } +} + +fn keyword_from_string<'a>(s: impl Into<&'a wstr>) -> ParseKeyword { + let s: &wstr = s.into(); + ParseKeyword::from(s) +} + +#[derive(Clone)] +struct ParseError { + /// Text of the error. + text: WString, + /// Code for the error. + code: ParseErrorCode, + /// Offset and length of the token in the source code that triggered this error. + source_start: usize, + source_length: usize, +} + +impl Default for ParseError { + fn default() -> ParseError { + ParseError { + text: L!("").to_owned(), + code: ParseErrorCode::none, + source_start: 0, + source_length: 0, + } + } +} + +impl ParseError { + /// Return a string describing the error, suitable for presentation to the user. If + /// is_interactive is true, the offending line with a caret is printed as well. + pub fn describe(self: &ParseError, src: &wstr, is_interactive: bool) -> WString { + self.describe_with_prefix(src, L!(""), is_interactive, false) + } + + /// Return a string describing the error, suitable for presentation to the user, with the given + /// prefix. If skip_caret is false, the offending line with a caret is printed as well. + pub fn describe_with_prefix( + self: &ParseError, + src: &wstr, + prefix: &wstr, + is_interactive: bool, + skip_caret: bool, + ) -> WString { + let mut result = prefix.to_owned(); + let context = wstr::from_char_slice( + &src.as_char_slice()[self.source_start..self.source_start + self.source_length], + ); + // Some errors don't have their message passed in, so we construct them here. + // This affects e.g. `eval "a=(foo)"` + match self.code { + ParseErrorCode::andor_in_pipeline => { + result += wstr::from_char_slice( + wgettext_fmt!(INVALID_PIPELINE_CMD_ERR_MSG, context).as_char_slice(), + ); + } + ParseErrorCode::bare_variable_assignment => { + let assignment_src = context; + #[allow(clippy::explicit_auto_deref)] + let equals_pos = variable_assignment_equals_pos(assignment_src).unwrap(); + let variable = &assignment_src[..equals_pos]; + let value = &assignment_src[equals_pos + 1..]; + result += wstr::from_char_slice( + wgettext_fmt!(ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable, value) + .as_char_slice(), + ); + } + _ => { + if skip_caret && self.text.is_empty() { + return L!("").to_owned(); + } + result += wstr::from_char_slice(self.text.as_char_slice()); + } + } + + let mut start = self.source_start; + let mut len = self.source_length; + if start >= src.len() { + // If we are past the source, we clamp it to the end. + start = src.len() - 1; + len = 0; + } + + if start + len > src.len() { + len = src.len() - self.source_start; + } + + if skip_caret { + return result; + } + + // Locate the beginning of this line of source. + let mut line_start = 0; + + // Look for a newline prior to source_start. If we don't find one, start at the beginning of + // the string; otherwise start one past the newline. Note that source_start may itself point + // at a newline; we want to find the newline before it. + if start > 0 { + let prefix = &src.as_char_slice()[..start]; + let newline_left_of_start = prefix.iter().rev().position(|c| *c == '\n'); + if let Some(left_of_start) = newline_left_of_start { + line_start = start - left_of_start; + } + } + // Look for the newline after the source range. If the source range itself includes a + // newline, that's the one we want, so start just before the end of the range. + let last_char_in_range = if len == 0 { start } else { start + len - 1 }; + let line_end = src.as_char_slice()[last_char_in_range..] + .iter() + .position(|c| *c == '\n') + .map(|pos| pos + last_char_in_range) + .unwrap_or(src.len()); + + assert!(line_end >= line_start); + assert!(start >= line_start); + + // Don't include the caret and line if we're interactive and this is the first line, because + // then it's obvious. + let interactive_skip_caret = is_interactive && start == 0; + if interactive_skip_caret { + return result; + } + + // Append the line of text. + if !result.is_empty() { + result += "\n"; + } + result += wstr::from_char_slice(&src.as_char_slice()[line_start..line_end]); + + // Append the caret line. The input source may include tabs; for that reason we + // construct a "caret line" that has tabs in corresponding positions. + let mut caret_space_line = WString::new(); + caret_space_line.reserve(start - line_start); + for i in line_start..start { + let wc = src.as_char_slice()[i]; + if wc == '\t' { + caret_space_line += "\t"; + } else if wc == '\n' { + // It's possible that the start points at a newline itself. In that case, + // pretend it's a space. We only expect this to be at the end of the string. + caret_space_line += " "; + } else { + let width = fish_wcwidth(wc.into()).0; + if width > 0 { + caret_space_line += " ".repeat(width as usize).as_str(); + } + } + } + result += "\n"; + result += wstr::from_char_slice(caret_space_line.as_char_slice()); + result += "^"; + if len > 1 { + // Add a squiggle under the error location. + // We do it like this + // ^~~^ + // With a "^" under the start and end, and squiggles in-between. + let width = fish_wcswidth(unsafe { src.as_ptr().add(start) }, len).0; + if width >= 2 { + // Subtract one for each of the carets - this is important in case + // the starting char has a width of > 1. + result += "~".repeat(width as usize - 2).as_str(); + result += "^"; + } + } + result + } +} + +impl From<&parse_error_t> for ParseError { + fn from(error: &parse_error_t) -> Self { + ParseError { + text: error.text.from_ffi(), + code: error.code, + source_start: error.source_start, + source_length: error.source_length, + } + } +} + +impl parse_error_t { + fn describe_with_prefix( + self: &parse_error_t, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr { + ParseError::from(self).describe_with_prefix_ffi(src, prefix, is_interactive, skip_caret) + } +} + +impl ParseError { + fn code(&self) -> ParseErrorCode { + self.code + } + fn source_start(&self) -> usize { + self.source_start + } + fn text(&self) -> UniquePtr { + self.text.to_ffi() + } + + fn describe_ffi( + self: &ParseError, + src: &CxxWString, + is_interactive: bool, + ) -> UniquePtr { + self.describe(&src.from_ffi(), is_interactive).to_ffi() + } + + fn describe_with_prefix_ffi( + self: &ParseError, + src: &CxxWString, + prefix: &CxxWString, + is_interactive: bool, + skip_caret: bool, + ) -> UniquePtr { + self.describe_with_prefix( + &src.from_ffi(), + &prefix.from_ffi(), + is_interactive, + skip_caret, + ) + .to_ffi() + } +} + +#[widestrs] +pub fn token_type_user_presentable_description( + type_: ParseTokenType, + keyword: ParseKeyword, +) -> WString { + if keyword != ParseKeyword::none { + return sprintf!("keyword: '%ls'"L, Into::<&'static wstr>::into(keyword)); + } + match type_ { + ParseTokenType::string => "a string"L.to_owned(), + ParseTokenType::pipe => "a pipe"L.to_owned(), + ParseTokenType::redirection => "a redirection"L.to_owned(), + ParseTokenType::background => "a '&'"L.to_owned(), + ParseTokenType::andand => "'&&'"L.to_owned(), + ParseTokenType::oror => "'||'"L.to_owned(), + ParseTokenType::end => "end of the statement"L.to_owned(), + ParseTokenType::terminate => "end of the input"L.to_owned(), + ParseTokenType::error => "a parse error"L.to_owned(), + ParseTokenType::tokenizer_error => "an incomplete token"L.to_owned(), + ParseTokenType::comment => "a comment"L.to_owned(), + _ => sprintf!("a %ls"L, Into::<&'static wstr>::into(type_)), + } +} + +fn token_type_user_presentable_description_ffi( + type_: ParseTokenType, + keyword: ParseKeyword, +) -> UniquePtr { + token_type_user_presentable_description(type_, keyword).to_ffi() +} + +/// TODO This should be type alias once we drop the FFI. +pub struct ParseErrorList(Vec); + +/// Helper function to offset error positions by the given amount. This is used when determining +/// errors in a substring of a larger source buffer. +pub fn parse_error_offset_source_start(errors: &mut ParseErrorList, amt: usize) { + if amt > 0 { + for ref mut error in errors.0.iter_mut() { + // Preserve the special meaning of -1 as 'unknown'. + if error.source_start != SOURCE_LOCATION_UNKNOWN { + error.source_start += amt; + } + } + } +} + +fn new_parse_error_list() -> Box { + Box::new(ParseErrorList(Vec::new())) +} + +impl ParseErrorList { + fn offset_source_start_ffi(&mut self, amt: usize) { + parse_error_offset_source_start(self, amt) + } + + fn size(&self) -> usize { + self.0.len() + } + + fn at(&self, offset: usize) -> *const ParseError { + &self.0[offset] + } + + fn empty(&self) -> bool { + self.0.is_empty() + } + + fn push_back(&mut self, error: &parse_error_t) { + self.0.push(error.into()) + } + + fn append(&mut self, other: *mut ParseErrorList) { + self.0.append(&mut (unsafe { &*other }.0.clone())); + } + + fn erase(&mut self, index: usize) { + self.0.remove(index); + } + + fn clear(&mut self) { + self.0.clear() + } +} + +/// Maximum number of function calls. +pub const FISH_MAX_STACK_DEPTH: usize = 128; + +/// Maximum number of nested string substitutions (in lieu of evals) +/// Reduced under TSAN: our CI test creates 500 jobs and this is very slow with TSAN. +#[cfg(feature = "FISH_TSAN_WORKAROUNDS")] +pub const FISH_MAX_EVAL_DEPTH: usize = 250; +#[cfg(not(feature = "FISH_TSAN_WORKAROUNDS"))] +pub const FISH_MAX_EVAL_DEPTH: usize = 500; + +/// Error message on a function that calls itself immediately. +pub const INFINITE_FUNC_RECURSION_ERR_MSG: &str = + "The function '%ls' calls itself immediately, which would result in an infinite loop."; + +/// Error message on reaching maximum call stack depth. +pub const CALL_STACK_LIMIT_EXCEEDED_ERR_MSG: &str = + "The call stack limit has been exceeded. Do you have an accidental infinite loop?"; + +/// Error message when encountering an unknown builtin name. +pub const UNKNOWN_BUILTIN_ERR_MSG: &str = "Unknown builtin '%ls'"; + +/// Error message when encountering a failed expansion, e.g. for the variable name in for loops. +pub const FAILED_EXPANSION_VARIABLE_NAME_ERR_MSG: &str = "Unable to expand variable name '%ls'"; + +/// Error message when encountering an illegal file descriptor. +pub const ILLEGAL_FD_ERR_MSG: &str = "Illegal file descriptor in redirection '%ls'"; + +/// Error message for wildcards with no matches. +pub const WILDCARD_ERR_MSG: &str = "No matches for wildcard '%ls'. See `help wildcards-globbing`."; + +/// Error when using break outside of loop. +pub const INVALID_BREAK_ERR_MSG: &str = "'break' while not inside of loop"; + +/// Error when using continue outside of loop. +pub const INVALID_CONTINUE_ERR_MSG: &str = "'continue' while not inside of loop"; + +/// Error message when a command may not be in a pipeline. +pub const INVALID_PIPELINE_CMD_ERR_MSG: &str = "The '%ls' command can not be used in a pipeline"; + +// Error messages. The number is a reminder of how many format specifiers are contained. + +/// Error for $^. +pub const ERROR_BAD_VAR_CHAR1: &str = "$%lc is not a valid variable in fish."; + +/// Error for ${a}. +pub const ERROR_BRACKETED_VARIABLE1: &str = + "Variables cannot be bracketed. In fish, please use {$%ls}."; + +/// Error for "${a}". +pub const ERROR_BRACKETED_VARIABLE_QUOTED1: &str = + "Variables cannot be bracketed. In fish, please use \"$%ls\"."; + +/// Error issued on $?. +pub const ERROR_NOT_STATUS: &str = "$? is not the exit status. In fish, please use $status."; + +/// Error issued on $$. +pub const ERROR_NOT_PID: &str = "$$ is not the pid. In fish, please use $fish_pid."; + +/// Error issued on $#. +pub const ERROR_NOT_ARGV_COUNT: &str = "$# is not supported. In fish, please use 'count $argv'."; + +/// Error issued on $@. +pub const ERROR_NOT_ARGV_AT: &str = "$@ is not supported. In fish, please use $argv."; + +/// Error issued on $*. +pub const ERROR_NOT_ARGV_STAR: &str = "$* is not supported. In fish, please use $argv."; + +/// Error issued on $. +pub const ERROR_NO_VAR_NAME: &str = "Expected a variable name after this $."; + +/// Error message for Posix-style assignment: foo=bar. +pub const ERROR_BAD_COMMAND_ASSIGN_ERR_MSG: &str = + "Unsupported use of '='. In fish, please use 'set %ls %ls'."; + +/// Error message for a command like `time foo &`. +pub const ERROR_TIME_BACKGROUND: &str = + "'time' is not supported for background jobs. Consider using 'command time'."; + +/// Error issued on { echo; echo }. +pub const ERROR_NO_BRACE_GROUPING: &str = + "'{ ... }' is not supported for grouping commands. Please use 'begin; ...; end'"; diff --git a/fish-rust/src/redirection.rs b/fish-rust/src/redirection.rs new file mode 100644 index 000000000..06644868b --- /dev/null +++ b/fish-rust/src/redirection.rs @@ -0,0 +1,239 @@ +//! This file supports specifying and applying redirections. + +use crate::wchar::L; +use crate::wchar_ffi::{wcharz_t, WCharToFFI, WString}; +use crate::wutil::fish_wcstoi; +use cxx::{CxxVector, CxxWString, SharedPtr, UniquePtr}; +use libc::{c_int, O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, O_WRONLY}; +use std::os::fd::RawFd; + +#[cxx::bridge] +mod redirection_ffi { + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + enum RedirectionMode { + overwrite, // normal redirection: > file.txt + append, // appending redirection: >> file.txt + input, // input redirection: < file.txt + fd, // fd redirection: 2>&1 + noclob, // noclobber redirection: >? file.txt + } + + extern "Rust" { + type RedirectionSpec; + + fn is_close(self: &RedirectionSpec) -> bool; + #[cxx_name = "get_target_as_fd"] + fn get_target_as_fd_ffi(self: &RedirectionSpec) -> SharedPtr; + fn oflags(self: &RedirectionSpec) -> i32; + + fn fd(self: &RedirectionSpec) -> i32; + fn mode(self: &RedirectionSpec) -> RedirectionMode; + fn target(self: &RedirectionSpec) -> UniquePtr; + fn new_redirection_spec( + fd: i32, + mode: RedirectionMode, + target: wcharz_t, + ) -> Box; + + type RedirectionSpecList; + fn new_redirection_spec_list() -> Box; + fn size(self: &RedirectionSpecList) -> usize; + fn at(self: &RedirectionSpecList, offset: usize) -> *const RedirectionSpec; + fn push_back(self: &mut RedirectionSpecList, spec: Box); + fn clone(self: &RedirectionSpecList) -> Box; + } + + /// A type that represents the action dup2(src, target). + /// If target is negative, this represents close(src). + /// Note none of the fds here are considered 'owned'. + #[derive(Clone, Copy)] + struct Dup2Action { + src: i32, + target: i32, + } + + /// A class representing a sequence of basic redirections. + struct Dup2List { + /// The list of actions. + actions: Vec, + } + + extern "Rust" { + fn get_actions(self: &Dup2List) -> &Vec; + #[cxx_name = "dup2_list_resolve_chain"] + fn dup2_list_resolve_chain_ffi(io_chain: &CxxVector) -> Dup2List; + fn fd_for_target_fd(self: &Dup2List, target: i32) -> i32; + } +} + +pub use redirection_ffi::{Dup2Action, Dup2List, RedirectionMode}; + +impl RedirectionMode { + /// The open flags for this redirection mode. + pub fn oflags(self) -> Option { + match self { + RedirectionMode::append => Some(O_CREAT | O_APPEND | O_WRONLY), + RedirectionMode::overwrite => Some(O_CREAT | O_WRONLY | O_TRUNC), + RedirectionMode::noclob => Some(O_CREAT | O_EXCL | O_WRONLY), + RedirectionMode::input => Some(O_RDONLY), + _ => None, + } + } +} + +/// A struct which represents a redirection specification from the user. +/// Here the file descriptors don't represent open files - it's purely textual. +#[derive(Clone)] +pub struct RedirectionSpec { + /// The redirected fd, or -1 on overflow. + /// In the common case of a pipe, this is 1 (STDOUT_FILENO). + /// For example, in the case of "3>&1" this will be 3. + fd: RawFd, + + /// The redirection mode. + mode: RedirectionMode, + + /// The target of the redirection. + /// For example in "3>&1", this will be "1". + /// In "< file.txt" this will be "file.txt". + target: WString, +} + +impl RedirectionSpec { + /// \return if this is a close-type redirection. + pub fn is_close(&self) -> bool { + self.mode == RedirectionMode::fd && self.target == L!("-") + } + + /// Attempt to parse target as an fd. + pub fn get_target_as_fd(&self) -> Option { + fish_wcstoi(self.target.as_char_slice().iter().copied()).ok() + } + fn get_target_as_fd_ffi(&self) -> SharedPtr { + match self.get_target_as_fd() { + Some(fd) => SharedPtr::new(fd), + None => SharedPtr::null(), + } + } + + /// \return the open flags for this redirection. + pub fn oflags(&self) -> c_int { + match self.mode.oflags() { + Some(flags) => flags, + None => panic!("Not a file redirection"), + } + } + + fn fd(&self) -> RawFd { + self.fd + } + + fn mode(&self) -> RedirectionMode { + self.mode + } + + fn target(&self) -> UniquePtr { + self.target.to_ffi() + } +} + +fn new_redirection_spec(fd: i32, mode: RedirectionMode, target: wcharz_t) -> Box { + Box::new(RedirectionSpec { + fd, + mode, + target: target.into(), + }) +} + +/// TODO This should be type alias once we drop the FFI. +pub struct RedirectionSpecList(Vec); + +fn new_redirection_spec_list() -> Box { + Box::new(RedirectionSpecList(Vec::new())) +} + +impl RedirectionSpecList { + fn size(&self) -> usize { + self.0.len() + } + fn at(&self, offset: usize) -> *const RedirectionSpec { + &self.0[offset] + } + #[allow(clippy::boxed_local)] + fn push_back(self: &mut RedirectionSpecList, spec: Box) { + self.0.push(*spec) + } + fn clone(self: &RedirectionSpecList) -> Box { + Box::new(RedirectionSpecList(self.0.clone())) + } +} + +/// Produce a dup_fd_list_t from an io_chain. This may not be called before fork(). +/// The result contains the list of fd actions (dup2 and close), as well as the list +/// of fds opened. +fn dup2_list_resolve_chain(io_chain: &Vec) -> Dup2List { + let mut result = Dup2List { actions: vec![] }; + for io in io_chain { + if io.src < 0 { + result.add_close(io.target) + } else { + result.add_dup2(io.src, io.target) + } + } + result +} + +fn dup2_list_resolve_chain_ffi(io_chain: &CxxVector) -> Dup2List { + dup2_list_resolve_chain(&io_chain.iter().cloned().collect()) +} + +impl Dup2List { + /// \return the list of dup2 actions. + fn get_actions(&self) -> &Vec { + &self.actions + } + + /// \return the fd ultimately dup'd to a target fd, or -1 if the target is closed. + /// For example, if target fd is 1, and we have a dup2 chain 5->3 and 3->1, then we will + /// return 5. If the target is not referenced in the chain, returns target. + fn fd_for_target_fd(&self, target: RawFd) -> RawFd { + // Paranoia. + if target < 0 { + return target; + } + // Note we can simply walk our action list backwards, looking for src -> target dups. + let mut cursor = target; + for action in self.actions.iter().rev() { + if action.target == cursor { + // cursor is replaced by action.src + cursor = action.src; + } else if action.src == cursor && action.target < 0 { + // cursor is closed. + cursor = -1; + break; + } + } + cursor + } + + /// Append a dup2 action. + fn add_dup2(&mut self, src: RawFd, target: RawFd) { + assert!(src >= 0 && target >= 0, "Invalid fd in add_dup2"); + // Note: record these even if src and target is the same. + // This is a note that we must clear the CLO_EXEC bit. + self.actions.push(Dup2Action { src, target }); + } + + /// Append a close action. + fn add_close(&mut self, fd: RawFd) { + assert!(fd >= 0, "Invalid fd in add_close"); + self.actions.push(Dup2Action { + src: fd, + target: -1, + }) + } +} diff --git a/fish-rust/src/signal.rs b/fish-rust/src/signal.rs new file mode 100644 index 000000000..15a5a1bf3 --- /dev/null +++ b/fish-rust/src/signal.rs @@ -0,0 +1,67 @@ +use widestring::U32CStr; + +use crate::ffi; +use crate::topic_monitor::{generation_t, invalid_generations, topic_monitor_principal, topic_t}; +use crate::wchar_ffi::{c_str, wstr}; + +/// A sigint_detector_t can be used to check if a SIGINT (or SIGHUP) has been delivered. +pub struct sigchecker_t { + topic: topic_t, + gen: generation_t, +} + +impl sigchecker_t { + /// Create a new checker for the given topic. + pub fn new(topic: topic_t) -> sigchecker_t { + let mut res = sigchecker_t { topic, gen: 0 }; + // Call check() to update our generation. + res.check(); + res + } + + /// Create a new checker for SIGHUP and SIGINT. + pub fn new_sighupint() -> sigchecker_t { + Self::new(topic_t::sighupint) + } + + /// Check if a sigint has been delivered since the last call to check(), or since the detector + /// was created. + pub fn check(&mut self) -> bool { + let tm = topic_monitor_principal(); + let gen = tm.generation_for_topic(self.topic); + let changed = self.gen != gen; + self.gen = gen; + changed + } + + /// Wait until a sigint is delivered. + pub fn wait(&self) { + let tm = topic_monitor_principal(); + let mut gens = invalid_generations(); + *gens.at_mut(self.topic) = self.gen; + tm.check(&mut gens, true /* wait */); + } +} + +/// Get the integer signal value representing the specified signal. +pub fn wcs2sig(s: &wstr) -> Option { + let sig = ffi::wcs2sig(c_str!(s)); + + sig.0.try_into().ok() +} + +/// Get string representation of a signal. +pub fn sig2wcs(sig: usize) -> &'static wstr { + let s = ffi::sig2wcs(i32::try_from(sig).expect("signal should be < 2^31").into()); + let s = unsafe { U32CStr::from_ptr_str(s) }; + + wstr::from_ucstr(s).expect("signal name should be valid utf-32") +} + +/// Returns a description of the specified signal. +pub fn signal_get_desc(sig: usize) -> &'static wstr { + let s = ffi::signal_get_desc(i32::try_from(sig).expect("signal should be < 2^31").into()); + let s = unsafe { U32CStr::from_ptr_str(s) }; + + wstr::from_ucstr(s).expect("signal description should be valid utf-32") +} diff --git a/fish-rust/src/smoke.rs b/fish-rust/src/smoke.rs new file mode 100644 index 000000000..853db4dc6 --- /dev/null +++ b/fish-rust/src/smoke.rs @@ -0,0 +1,26 @@ +#[cxx::bridge(namespace = rust)] +mod ffi { + extern "Rust" { + fn add(left: usize, right: usize) -> usize; + } +} + +pub fn add(left: usize, right: usize) -> usize { + left + right +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn it_works() { + let result = add(2, 2); + assert_eq!(result, 4); + } +} + +use crate::ffi_tests::add_test; +add_test!("test_add", || { + assert_eq!(add(2, 3), 5); +}); diff --git a/fish-rust/src/timer.rs b/fish-rust/src/timer.rs new file mode 100644 index 000000000..5dc17eb43 --- /dev/null +++ b/fish-rust/src/timer.rs @@ -0,0 +1,266 @@ +//! This module houses `TimerSnapshot` which can be used to calculate the elapsed time (system CPU +//! time, user CPU time, and observed wall time, broken down by fish and child processes spawned by +//! fish) between two `TimerSnapshot` instances. +//! +//! Measuring time is always complicated with many caveats. Quite apart from the typical +//! gotchas faced by developers attempting to choose between monotonic vs non-monotonic and system vs +//! cpu clocks, the fact that we are executing as a shell further complicates matters: we can't just +//! observe the elapsed CPU time, because that does not reflect the total execution time for both +//! ourselves (internal shell execution time and the time it takes for builtins and functions to +//! execute) and any external processes we spawn. +//! +//! `std::time::Instant` is used to monitor elapsed wall time. Unlike `SystemTime`, `Instant` is +//! guaranteed to be monotonic though it is likely to not be as high of a precision as we would like +//! but it's still the best we can do because we don't know how long of a time might elapse between +//! `TimerSnapshot` instances and need to avoid rollover. + +use std::io::Write; +use std::time::{Duration, Instant}; + +#[cxx::bridge] +mod timer_ffi { + extern "Rust" { + type PrintElapsedOnDropFfi; + #[cxx_name = "push_timer"] + fn push_timer_ffi(enabled: bool) -> Box; + } +} + +enum Unit { + Minutes, + Seconds, + Millis, + Micros, +} + +struct TimerSnapshot { + wall_time: Instant, + cpu_fish: libc::rusage, + cpu_children: libc::rusage, +} + +/// If `enabled`, create a `TimerSnapshot` and return a `PrintElapsedOnDrop` object that will print +/// upon being dropped the delta between now and the time that it is dropped at. Otherwise return +/// `None`. +pub fn push_timer(enabled: bool) -> Option { + if !enabled { + return None; + } + + Some(PrintElapsedOnDrop { + start: TimerSnapshot::take(), + }) +} + +/// cxx bridge does not support UniquePtr so we can't use a null UniquePtr to +/// represent a None, and cxx bridge does not support Box> so we need to make +/// our own wrapper type that incorporates the Some/None states directly into it. +enum PrintElapsedOnDropFfi { + Some(PrintElapsedOnDrop), + None, +} + +fn push_timer_ffi(enabled: bool) -> Box { + Box::new(match push_timer(enabled) { + Some(t) => PrintElapsedOnDropFfi::Some(t), + None => PrintElapsedOnDropFfi::None, + }) +} + +/// An enumeration of supported libc rusage types used by [`getrusage()`]. +/// NB: RUSAGE_THREAD is not supported on macOS. +enum RUsage { + RSelf, // "Self" is a reserved keyword + RChildren, +} + +/// A safe wrapper around `libc::getrusage()` +fn getrusage(resource: RUsage) -> libc::rusage { + let mut rusage = std::mem::MaybeUninit::uninit(); + let result = unsafe { + match resource { + RUsage::RSelf => libc::getrusage(libc::RUSAGE_SELF, rusage.as_mut_ptr()), + RUsage::RChildren => libc::getrusage(libc::RUSAGE_CHILDREN, rusage.as_mut_ptr()), + } + }; + + // getrusage(2) says the syscall can only fail if the dest address is invalid (EFAULT) or if the + // requested resource type is invalid. Since we're in control of both, we can assume it won't + // fail. In case it does anyway (e.g. OS where the syscall isn't implemented), we can just + // return an empty value. + match result { + 0 => unsafe { rusage.assume_init() }, + _ => unsafe { std::mem::zeroed() }, + } +} + +impl TimerSnapshot { + pub fn take() -> TimerSnapshot { + TimerSnapshot { + cpu_fish: getrusage(RUsage::RSelf), + cpu_children: getrusage(RUsage::RChildren), + wall_time: Instant::now(), + } + } + + /// Returns a formatted string containing the detailed difference between two `TimerSnapshot` + /// instances. The returned string can take one of two formats, depending on the value of the + /// `verbose` parameter. + pub fn get_delta(t1: &TimerSnapshot, t2: &TimerSnapshot, verbose: bool) -> String { + use crate::nix::timeval_to_duration as from; + + let mut fish_sys = from(&t2.cpu_fish.ru_stime) - from(&t1.cpu_fish.ru_stime); + let mut fish_usr = from(&t2.cpu_fish.ru_utime) - from(&t1.cpu_fish.ru_utime); + let mut child_sys = from(&t2.cpu_children.ru_stime) - from(&t1.cpu_children.ru_stime); + let mut child_usr = from(&t2.cpu_children.ru_utime) - from(&t1.cpu_children.ru_utime); + + // The result from getrusage is not necessarily realtime, it may be cached from a few + // microseconds ago. In the event that execution completes extremely quickly or there is + // no data (say, we are measuring external execution time but no external processes have + // been launched), it can incorrectly appear to be negative. + fish_sys = fish_sys.max(Duration::ZERO); + fish_usr = fish_usr.max(Duration::ZERO); + child_sys = child_sys.max(Duration::ZERO); + child_usr = child_usr.max(Duration::ZERO); + // As `Instant` is strictly monotonic, this can't be negative so we don't need to clamp. + let net_wall_micros = (t2.wall_time - t1.wall_time).as_micros() as i64; + let net_sys_micros = (fish_sys + child_sys).as_micros() as i64; + let net_usr_micros = (fish_usr + child_usr).as_micros() as i64; + + let wall_unit = Unit::for_micros(net_wall_micros); + // Make sure we share the same unit for the various CPU times + let cpu_unit = Unit::for_micros(net_sys_micros.max(net_usr_micros)); + + let wall_time = wall_unit.convert_micros(net_wall_micros); + let sys_time = cpu_unit.convert_micros(net_sys_micros); + let usr_time = cpu_unit.convert_micros(net_usr_micros); + + let mut output = String::new(); + if !verbose { + output += &"\n_______________________________"; + output += &format!("\nExecuted in {:6.2} {}", wall_time, wall_unit.long_name()); + output += &format!("\n usr time {:6.2} {}", usr_time, cpu_unit.long_name()); + output += &format!("\n sys time {:6.2} {}", sys_time, cpu_unit.long_name()); + } else { + let fish_unit = Unit::for_micros(fish_sys.max(fish_usr).as_micros() as i64); + let child_unit = Unit::for_micros(child_sys.max(child_usr).as_micros() as i64); + let fish_usr_time = fish_unit.convert_micros(fish_usr.as_micros() as i64); + let fish_sys_time = fish_unit.convert_micros(fish_sys.as_micros() as i64); + let child_usr_time = child_unit.convert_micros(child_usr.as_micros() as i64); + let child_sys_time = child_unit.convert_micros(child_sys.as_micros() as i64); + + let column2_unit_len = wall_unit + .short_name() + .len() + .max(cpu_unit.short_name().len()); + let wall_unit = wall_unit.short_name(); + let cpu_unit = cpu_unit.short_name(); + let fish_unit = fish_unit.short_name(); + let child_unit = child_unit.short_name(); + + output += &"\n________________________________________________________"; + output += &format!( + "\nExecuted in {wall_time:6.2} {wall_unit: Unit { + match micros { + 900_000_001.. => Unit::Minutes, + // Move to seconds if we would overflow the %6.2 format + 999_995.. => Unit::Seconds, + 1000.. => Unit::Millis, + _ => Unit::Micros, + } + } + + const fn short_name(&self) -> &'static str { + match self { + &Unit::Minutes => "mins", + &Unit::Seconds => "secs", + &Unit::Millis => "millis", + &Unit::Micros => "micros", + } + } + + const fn long_name(&self) -> &'static str { + match self { + &Unit::Minutes => "minutes", + &Unit::Seconds => "seconds", + &Unit::Millis => "milliseconds", + &Unit::Micros => "microseconds", + } + } + + fn convert_micros(&self, micros: i64) -> f64 { + match self { + &Unit::Minutes => micros as f64 / 1.0E6 / 60.0, + &Unit::Seconds => micros as f64 / 1.0E6, + &Unit::Millis => micros as f64 / 1.0E3, + &Unit::Micros => micros as f64 / 1.0, + } + } +} + +#[test] +fn timer_format_and_alignment() { + let mut t1 = TimerSnapshot::take(); + t1.cpu_fish.ru_utime.tv_usec = 0; + t1.cpu_fish.ru_stime.tv_usec = 0; + t1.cpu_children.ru_utime.tv_usec = 0; + t1.cpu_children.ru_stime.tv_usec = 0; + + let mut t2 = TimerSnapshot::take(); + t2.cpu_fish.ru_utime.tv_usec = 999995; + t2.cpu_fish.ru_stime.tv_usec = 999994; + t2.cpu_children.ru_utime.tv_usec = 1000; + t2.cpu_children.ru_stime.tv_usec = 500; + t2.wall_time = t1.wall_time + Duration::from_micros(500); + + let expected = r#" +________________________________________________________ +Executed in 500.00 micros fish external + usr time 1.00 secs 1.00 secs 1.00 millis + sys time 1.00 secs 1.00 secs 0.50 millis +"#; + // (a) (b) (c) + // (a) remaining columns should align even if there are different units + // (b) carry to the next unit when it would overflow %6.2F + // (c) carry to the next unit when the larger one exceeds 1000 + let actual = TimerSnapshot::get_delta(&t1, &t2, true); + assert_eq!(actual, expected); +} diff --git a/fish-rust/src/tokenizer.rs b/fish-rust/src/tokenizer.rs new file mode 100644 index 000000000..fc0e094e1 --- /dev/null +++ b/fish-rust/src/tokenizer.rs @@ -0,0 +1,1376 @@ +//! A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be +//! extended to support marks, tokenizing multiple strings and disposing of unused string segments. + +use crate::ffi::{valid_var_name_char, wcharz_t}; +use crate::future_feature_flags::{feature_test, FeatureFlag}; +use crate::parse_constants::SOURCE_OFFSET_INVALID; +use crate::redirection::RedirectionMode; +use crate::wchar::{WExt, L}; +use crate::wchar_ffi::{wchar_t, wstr, WCharFromFFI, WCharToFFI, WString}; +use crate::wutil::wgettext; +use cxx::{CxxWString, SharedPtr, UniquePtr}; +use libc::{c_int, STDIN_FILENO, STDOUT_FILENO}; +use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, Not}; +use widestring_suffix::widestrs; + +#[cxx::bridge] +mod tokenizer_ffi { + extern "C++" { + include!("wutil.h"); + include!("redirection.h"); + type wcharz_t = super::wcharz_t; + type RedirectionMode = super::RedirectionMode; + } + + /// Token types. XXX Why this isn't ParseTokenType, I'm not really sure. + enum TokenType { + /// Error reading token + error, + /// String token + string, + /// Pipe token + pipe, + /// && token + andand, + /// || token + oror, + /// End token (semicolon or newline, not literal end) + end, + /// redirection token + redirect, + /// send job to bg token + background, + /// comment token + comment, + } + + enum TokenizerError { + none, + unterminated_quote, + unterminated_subshell, + unterminated_slice, + unterminated_escape, + invalid_redirect, + invalid_pipe, + invalid_pipe_ampersand, + closing_unopened_subshell, + illegal_slice, + closing_unopened_brace, + unterminated_brace, + expected_pclose_found_bclose, + expected_bclose_found_pclose, + } + + extern "Rust" { + fn tokenizer_get_error_message(err: TokenizerError) -> UniquePtr; + } + + struct Tok { + // Offset of the token. + offset: u32, + // Length of the token. + length: u32, + + // If an error, this is the offset of the error within the token. A value of 0 means it occurred + // at 'offset'. + error_offset_within_token: u32, + error_length: u32, + + // If an error, this is the error code. + error: TokenizerError, + + // The type of the token. + type_: TokenType, + } + // TODO static_assert(sizeof(Tok) <= 32, "Tok expected to be 32 bytes or less"); + + extern "Rust" { + fn location_in_or_at_end_of_source_range(self: &Tok, loc: usize) -> bool; + #[cxx_name = "get_source"] + fn get_source_ffi(self: &Tok, str: &CxxWString) -> UniquePtr; + } + + extern "Rust" { + type Tokenizer; + fn new_tokenizer(start: wcharz_t, flags: u8) -> Box; + #[cxx_name = "next"] + fn next_ffi(self: &mut Tokenizer) -> UniquePtr; + #[cxx_name = "text_of"] + fn text_of_ffi(self: &Tokenizer, tok: &Tok) -> UniquePtr; + #[cxx_name = "is_token_delimiter"] + fn is_token_delimiter_ffi(c: wchar_t, next: SharedPtr) -> bool; + } + + extern "Rust" { + #[cxx_name = "tok_command"] + fn tok_command_ffi(str: &CxxWString) -> UniquePtr; + } + + /// Struct wrapping up a parsed pipe or redirection. + struct PipeOrRedir { + // The redirected fd, or -1 on overflow. + // In the common case of a pipe, this is 1 (STDOUT_FILENO). + // For example, in the case of "3>&1" this will be 3. + fd: i32, + + // Whether we are a pipe (true) or redirection (false). + is_pipe: bool, + + // The redirection mode if the type is redirect. + // Ignored for pipes. + mode: RedirectionMode, + + // Whether, in addition to this redirection, stderr should also be dup'd to stdout + // For example &| or &> + stderr_merge: bool, + + // Number of characters consumed when parsing the string. + consumed: usize, + } + + extern "Rust" { + fn pipe_or_redir_from_string(buff: wcharz_t) -> UniquePtr; + fn is_valid(self: &PipeOrRedir) -> bool; + fn oflags(self: &PipeOrRedir) -> i32; + fn token_type(self: &PipeOrRedir) -> TokenType; + } + + enum MoveWordStyle { + move_word_style_punctuation, // stop at punctuation + move_word_style_path_components, // stops at path components + move_word_style_whitespace, // stops at whitespace + } + + /// Our state machine that implements "one word" movement or erasure. + struct MoveWordStateMachine { + state: u8, + style: MoveWordStyle, + } + + extern "Rust" { + fn new_move_word_state_machine(syl: MoveWordStyle) -> Box; + #[cxx_name = "consume_char"] + fn consume_char_ffi(self: &mut MoveWordStateMachine, c: wchar_t) -> bool; + fn reset(self: &mut MoveWordStateMachine); + } + + extern "Rust" { + #[cxx_name = "variable_assignment_equals_pos"] + fn variable_assignment_equals_pos_ffi(txt: &CxxWString) -> SharedPtr; + } +} + +pub use tokenizer_ffi::{ + MoveWordStateMachine, MoveWordStyle, PipeOrRedir, Tok, TokenType, TokenizerError, +}; + +#[derive(Clone, Copy)] +pub struct TokFlags(u8); + +impl BitAnd for TokFlags { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitOr for TokFlags { + type Output = Self; + fn bitor(self, rhs: Self) -> Self::Output { + Self(self.0 | rhs.0) + } +} + +/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching +/// parenthesis, etc. This is useful for tab-completion. +pub const TOK_ACCEPT_UNFINISHED: TokFlags = TokFlags(1); + +/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting. +pub const TOK_SHOW_COMMENTS: TokFlags = TokFlags(2); + +/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells +/// the tokenizer to return each of them as a separate END. +pub const TOK_SHOW_BLANK_LINES: TokFlags = TokFlags(4); + +/// Make an effort to continue after an error. +pub const TOK_CONTINUE_AFTER_ERROR: TokFlags = TokFlags(8); + +/// Get the error message for an error \p err. +fn tokenizer_get_error_message(err: TokenizerError) -> UniquePtr { + let s: &'static wstr = err.into(); + s.to_ffi() +} + +impl From for &'static wstr { + #[widestrs] + fn from(err: TokenizerError) -> Self { + match err { + TokenizerError::none => ""L, + TokenizerError::unterminated_quote => { + wgettext!("Unexpected end of string, quotes are not balanced") + } + TokenizerError::unterminated_subshell => { + wgettext!("Unexpected end of string, expecting ')'") + } + TokenizerError::unterminated_slice => { + wgettext!("Unexpected end of string, square brackets do not match") + } + TokenizerError::unterminated_escape => { + wgettext!("Unexpected end of string, incomplete escape sequence") + } + TokenizerError::invalid_redirect => { + wgettext!("Invalid input/output redirection") + } + TokenizerError::invalid_pipe => { + wgettext!("Cannot use stdin (fd 0) as pipe output") + } + TokenizerError::invalid_pipe_ampersand => { + wgettext!("|& is not valid. In fish, use &| to pipe both stdout and stderr.") + } + TokenizerError::closing_unopened_subshell => { + wgettext!("Unexpected ')' for unopened parenthesis") + } + TokenizerError::illegal_slice => { + wgettext!("Unexpected '[' at this location") + } + TokenizerError::closing_unopened_brace => { + wgettext!("Unexpected '}' for unopened brace expansion") + } + TokenizerError::unterminated_brace => { + wgettext!("Unexpected end of string, incomplete parameter expansion") + } + TokenizerError::expected_pclose_found_bclose => { + wgettext!("Unexpected '}' found, expecting ')'") + } + TokenizerError::expected_bclose_found_pclose => { + wgettext!("Unexpected ')' found, expecting '}'") + } + _ => { + panic!("Unexpected tokenizer error"); + } + } + } +} + +impl Tok { + fn new(r#type: TokenType) -> Tok { + Tok { + offset: 0, + length: 0, + error_offset_within_token: SOURCE_OFFSET_INVALID, + error_length: 0, + error: TokenizerError::none, + type_: r#type, + } + } + pub fn location_in_or_at_end_of_source_range(self: &Tok, loc: usize) -> bool { + let loc = loc as u32; + self.offset <= loc && loc - self.offset <= self.length + } + pub fn get_source<'a, 'b>(self: &'a Tok, str: &'b wstr) -> &'b wstr { + &str[self.offset as usize..(self.offset + self.length) as usize] + } + fn get_source_ffi(self: &Tok, str: &CxxWString) -> UniquePtr { + self.get_source(&str.from_ffi()).to_ffi() + } +} + +/// The tokenizer struct. +pub struct Tokenizer { + /// A pointer into the original string, showing where the next token begins. + token_cursor: usize, + /// The start of the original string. + start: WString, // TODO Avoid copying once we drop the FFI. + /// Whether we have additional tokens. + has_next: bool, + /// Whether incomplete tokens are accepted. + accept_unfinished: bool, + /// Whether comments should be returned. + show_comments: bool, + /// Whether all blank lines are returned. + show_blank_lines: bool, + /// Whether to attempt to continue after an error. + continue_after_error: bool, + /// Whether to continue the previous line after the comment. + continue_line_after_comment: bool, +} + +impl Tokenizer { + /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and + /// should not be freed by the caller until after the tokenizer is destroyed. + /// + /// \param start The string to tokenize + /// \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer + /// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid + /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens + fn new(start: &wstr, flags: TokFlags) -> Self { + Tokenizer { + token_cursor: 0, + start: start.to_owned(), + has_next: true, + accept_unfinished: flags & TOK_ACCEPT_UNFINISHED, + show_comments: flags & TOK_SHOW_COMMENTS, + show_blank_lines: flags & TOK_SHOW_BLANK_LINES, + continue_after_error: flags & TOK_CONTINUE_AFTER_ERROR, + continue_line_after_comment: false, + } + } +} + +fn new_tokenizer(start: wcharz_t, flags: u8) -> Box { + Box::new(Tokenizer::new(start.into(), TokFlags(flags))) +} + +impl Tokenizer { + /// Returns the next token, or none if we are at the end. + pub fn next(&mut self) -> Option { + // TODO Implement IntoIterator. + if !self.has_next { + return None; + } + + // Consume non-newline whitespace. If we get an escaped newline, mark it and continue past + // it. + loop { + let i = self.token_cursor; + if self.start.get(i..i + 2) == Some(L!("\\\n")) { + self.token_cursor += 2; + self.continue_line_after_comment = true; + } else if i < self.start.len() && iswspace_not_nl(self.start.char_at(i)) { + self.token_cursor += 1; + } else { + break; + } + } + + while self.start.char_at(self.token_cursor) == '#' { + // We have a comment, walk over the comment. + let comment_start = self.token_cursor; + self.token_cursor = comment_end(&self.start, self.token_cursor); + let comment_len = self.token_cursor - comment_start; + + // If we are going to continue after the comment, skip any trailing newline. + if self.start.as_char_slice().get(self.token_cursor) == Some(&'\n') + && self.continue_line_after_comment + { + self.token_cursor += 1; + } + + // Maybe return the comment. + if self.show_comments { + let mut result = Tok::new(TokenType::comment); + result.offset = comment_start as u32; + result.length = comment_len as u32; + return Some(result); + } + + while self.token_cursor < self.start.len() + && iswspace_not_nl(self.start.char_at(self.token_cursor)) + { + self.token_cursor += 1; + } + } + + // We made it past the comments and ate any trailing newlines we wanted to ignore. + self.continue_line_after_comment = false; + let start_pos = self.token_cursor; + + let this_char = self.start.char_at(self.token_cursor); + let next_char = self + .start + .as_char_slice() + .get(self.token_cursor + 1) + .copied(); + let buff = &self.start[self.token_cursor..]; + match this_char { + '\0'=> { + self.has_next = false; + None + } + '\r'| // carriage-return + '\n'| // newline + ';'=> { + let mut result = Tok::new(TokenType::end); + result.offset = start_pos as u32; + result.length = 1; + self.token_cursor+=1; + // Hack: when we get a newline, swallow as many as we can. This compresses multiple + // subsequent newlines into a single one. + if !self.show_blank_lines { + while self.token_cursor < self.start.len() { + let c = self.start.char_at(self.token_cursor); + if c != '\n' && c != '\r' && c != ' ' && c != '\t' { + break + } + self.token_cursor+=1; + } + } + Some(result) + } + '&'=> { + if next_char == Some('&') { + // && is and. + let mut result = Tok::new(TokenType::andand); + result.offset = start_pos as u32; + result.length = 2; + self.token_cursor += 2; + Some(result) + } else if next_char == Some('>') || next_char == Some('|') { + // &> and &| redirect both stdout and stderr. + let redir = PipeOrRedir::try_from(buff). + expect("Should always succeed to parse a &> or &| redirection"); + let mut result = Tok::new(redir.token_type()); + result.offset = start_pos as u32; + result.length = redir.consumed as u32; + self.token_cursor += redir.consumed; + Some(result) + } else { + let mut result = Tok::new(TokenType::background); + result.offset = start_pos as u32; + result.length = 1; + self.token_cursor+=1; + Some(result) + } + } + '|'=> { + if next_char == Some('|') { + // || is or. + let mut result=Tok::new(TokenType::oror); + result.offset = start_pos as u32; + result.length = 2; + self.token_cursor += 2; + Some(result) + } else if next_char == Some('&') { + // |& is a bashism; in fish it's &|. + Some(self.call_error(TokenizerError::invalid_pipe_ampersand, + self.token_cursor, self.token_cursor, Some(2), 2)) + } else { + let pipe = PipeOrRedir::try_from(buff). + expect("Should always succeed to parse a | pipe"); + let mut result = Tok::new(pipe.token_type()); + result.offset = start_pos as u32; + result.length = pipe.consumed as u32; + self.token_cursor += pipe.consumed; + Some(result) + } + } + '>'| '<' => { + // There's some duplication with the code in the default case below. The key + // difference here is that we must never parse these as a string; a failed + // redirection is an error! + match PipeOrRedir::try_from(buff) { + Ok(redir_or_pipe) => { + if redir_or_pipe.fd < 0 { + Some(self.call_error(TokenizerError::invalid_redirect, self.token_cursor, + self.token_cursor, + Some(redir_or_pipe.consumed), + redir_or_pipe.consumed)) + } else { + let mut result = Tok::new(redir_or_pipe.token_type()); + result.offset = start_pos as u32; + result.length = redir_or_pipe.consumed as u32; + self.token_cursor += redir_or_pipe.consumed; + Some(result) + } + } + Err(()) => Some(self.call_error(TokenizerError::invalid_redirect, self.token_cursor, + self.token_cursor, + Some(0), + 0)) + } + } + _ => { + // Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string. + let error_location = self.token_cursor; + let redir_or_pipe = if this_char.is_ascii_digit() { + PipeOrRedir::try_from(buff).ok() + } else { + None + }; + + match redir_or_pipe { + Some(redir_or_pipe) => { + // It looks like a redirection or a pipe. But we don't support piping fd 0. Note + // tSome(hat fd 0 may be -1, indicating overflow; but we don't treat that as a + // tokenizer error. + if redir_or_pipe.is_pipe && redir_or_pipe.fd == 0 { + Some(self.call_error(TokenizerError::invalid_pipe, error_location, + error_location, Some(redir_or_pipe.consumed), + redir_or_pipe.consumed)) + } + else { + let mut result = Tok::new(redir_or_pipe.token_type()); + result.offset = start_pos as u32; + result.length = redir_or_pipe.consumed as u32; + self.token_cursor += redir_or_pipe.consumed; + Some(result) + } + } + None => { + // Not a redirection or pipe, so just a string. + Some(self.read_string()) + } + } + } + } + } + fn next_ffi(&mut self) -> UniquePtr { + match self.next() { + Some(tok) => UniquePtr::new(tok), + None => UniquePtr::null(), + } + } +} + +/// Test if a character is whitespace. Differs from iswspace in that it does not consider a +/// newline to be whitespace. +fn iswspace_not_nl(c: char) -> bool { + match c { + ' ' | '\t' | '\r' => true, + '\n' => false, + _ => c.is_whitespace(), + } +} + +impl Tokenizer { + /// Returns the text of a token, as a string. + pub fn text_of(&self, tok: &Tok) -> &wstr { + tok.get_source(&self.start) + } + fn text_of_ffi(&self, tok: &Tok) -> UniquePtr { + self.text_of(tok).to_ffi() + } + + /// Return an error token and mark that we no longer have a next token. + fn call_error( + &mut self, + error_type: TokenizerError, + token_start: usize, + error_loc: usize, + token_length: Option, + error_len: usize, + ) -> Tok { + assert!( + error_type != TokenizerError::none, + "TokenizerError::none passed to call_error" + ); + assert!(error_loc >= token_start, "Invalid error location"); + assert!(self.token_cursor >= token_start, "Invalid buff location"); + + // If continue_after_error is set and we have a real token length, then skip past it. + // Otherwise give up. + match token_length { + Some(token_length) if self.continue_after_error => { + assert!( + self.token_cursor < error_loc + token_length, + "Unable to continue past error" + ); + self.token_cursor = error_loc + token_length; + } + _ => self.has_next = false, + } + + Tok { + offset: token_start as u32, + length: token_length.unwrap_or(self.token_cursor - token_start) as u32, + error_offset_within_token: (error_loc - token_start) as u32, + error_length: error_len as u32, + error: error_type, + type_: TokenType::error, + } + } +} + +impl Tokenizer { + /// Read the next token as a string. + fn read_string(&mut self) -> Tok { + let mut mode = TOK_MODE_REGULAR_TEXT; + let mut paran_offsets = vec![]; + let mut brace_offsets = vec![]; + let mut expecting = vec![]; + let mut quoted_cmdsubs = vec![]; + let mut slice_offset = 0; + let buff_start = self.token_cursor; + let mut is_token_begin = true; + + fn process_opening_quote( + this: &mut Tokenizer, + quoted_cmdsubs: &mut Vec, + paran_offsets: &mut Vec, + quote: char, + ) -> Result<(), usize> { + if let Some(end) = quote_end(&this.start, this.token_cursor, quote) { + if this.start.char_at(end) == '$' { + quoted_cmdsubs.push(paran_offsets.len()); + } + this.token_cursor = end; + Ok(()) + } else { + let error_loc = this.token_cursor; + this.token_cursor = this.start.len(); + Err(error_loc) + } + } + + while self.token_cursor != self.start.len() { + let c = self.start.char_at(self.token_cursor); + + // Make sure this character isn't being escaped before anything else + if mode & TOK_MODE_CHAR_ESCAPE { + mode &= !TOK_MODE_CHAR_ESCAPE; + // and do nothing more + } else if myal(c) { + // Early exit optimization in case the character is just a letter, + // which has no special meaning to the tokenizer, i.e. the same mode continues. + } + // Now proceed with the evaluation of the token, first checking to see if the token + // has been explicitly ignored (escaped). + else if c == '\\' { + mode |= TOK_MODE_CHAR_ESCAPE; + } else if c == '#' && is_token_begin { + self.token_cursor = comment_end(&self.start, self.token_cursor) - 1; + } else if c == '(' { + paran_offsets.push(self.token_cursor); + expecting.push(')'); + mode |= TOK_MODE_SUBSHELL; + } else if c == '{' { + brace_offsets.push(self.token_cursor); + expecting.push('}'); + mode |= TOK_MODE_CURLY_BRACES; + } else if c == ')' { + if expecting.last() == Some(&'}') { + return self.call_error( + TokenizerError::expected_bclose_found_pclose, + self.token_cursor, + self.token_cursor, + Some(1), + 1, + ); + } + if paran_offsets.is_empty() { + return self.call_error( + TokenizerError::closing_unopened_subshell, + self.token_cursor, + self.token_cursor, + Some(1), + 1, + ); + } + paran_offsets.pop(); + if paran_offsets.is_empty() { + mode &= !TOK_MODE_SUBSHELL; + } + expecting.pop(); + // Check if the ) completed a quoted command substitution. + if quoted_cmdsubs.last() == Some(¶n_offsets.len()) { + quoted_cmdsubs.pop(); + // The "$(" part of a quoted command substitution closes double quotes. To keep + // quotes balanced, act as if there was an invisible double quote after the ")". + if let Err(error_loc) = + process_opening_quote(self, &mut quoted_cmdsubs, &mut paran_offsets, '"') + { + if !self.accept_unfinished { + return self.call_error( + TokenizerError::unterminated_quote, + buff_start, + error_loc, + None, + 0, + ); + } + break; + } + } + } else if c == '}' { + if expecting.last() == Some(&')') { + return self.call_error( + TokenizerError::expected_pclose_found_bclose, + self.token_cursor, + self.token_cursor, + Some(1), + 1, + ); + } + if brace_offsets.is_empty() { + return self.call_error( + TokenizerError::closing_unopened_brace, + self.token_cursor, + self.start.len(), + None, + 0, + ); + } + brace_offsets.pop(); + if brace_offsets.is_empty() { + mode &= !TOK_MODE_CURLY_BRACES; + } + expecting.pop(); + } else if c == '[' { + if self.token_cursor != buff_start { + mode |= TOK_MODE_ARRAY_BRACKETS; + slice_offset = self.token_cursor; + } else { + // This is actually allowed so the test operator `[` can be used as the head of a + // command + } + } + // Only exit bracket mode if we are in bracket mode. + // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias. + // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket + else if c == ']' && (mode & TOK_MODE_ARRAY_BRACKETS) { + mode &= !TOK_MODE_ARRAY_BRACKETS; + } else if c == '\'' || c == '"' { + if let Err(error_loc) = + process_opening_quote(self, &mut quoted_cmdsubs, &mut paran_offsets, c) + { + if !self.accept_unfinished { + return self.call_error( + TokenizerError::unterminated_quote, + buff_start, + error_loc, + None, + 1, + ); + } + break; + } + } else if mode == TOK_MODE_REGULAR_TEXT + && !tok_is_string_character( + c, + self.start + .as_char_slice() + .get(self.token_cursor + 1) + .copied(), + ) + { + break; + } + + let next = self + .start + .as_char_slice() + .get(self.token_cursor + 1) + .copied(); + is_token_begin = is_token_delimiter(c, next); + self.token_cursor += 1; + } + + if !self.accept_unfinished && mode != TOK_MODE_REGULAR_TEXT { + // These are all "unterminated", so the only char we can mark as an error + // is the opener (the closing char could be anywhere!) + // + // (except for TOK_MODE_CHAR_ESCAPE, which is one long by definition) + if mode & TOK_MODE_CHAR_ESCAPE { + return self.call_error( + TokenizerError::unterminated_escape, + buff_start, + self.token_cursor - 1, + None, + 1, + ); + } else if mode & TOK_MODE_ARRAY_BRACKETS { + return self.call_error( + TokenizerError::unterminated_slice, + buff_start, + slice_offset, + None, + 1, + ); + } else if mode & TOK_MODE_SUBSHELL { + assert!(!paran_offsets.is_empty()); + let offset_of_open_paran = *paran_offsets.last().unwrap(); + + return self.call_error( + TokenizerError::unterminated_subshell, + buff_start, + offset_of_open_paran, + None, + 1, + ); + } else if mode & TOK_MODE_CURLY_BRACES { + assert!(!brace_offsets.is_empty()); + let offset_of_open_brace = *brace_offsets.last().unwrap(); + + return self.call_error( + TokenizerError::unterminated_brace, + buff_start, + offset_of_open_brace, + None, + 1, + ); + } else { + panic!("Unknown non-regular-text mode"); + } + } + + let mut result = Tok::new(TokenType::string); + result.offset = buff_start as u32; + result.length = (self.token_cursor - buff_start) as u32; + result + } +} + +pub fn quote_end(s: &wstr, mut pos: usize, quote: char) -> Option { + loop { + pos += 1; + + if pos == s.len() { + return None; + } + + let c = s.char_at(pos); + if c == '\\' { + pos += 1; + if pos == s.len() { + return None; + } + } else if c == quote || + // Command substitutions also end a double quoted string. This is how we + // support command substitutions inside double quotes. + (quote == '"' && c == '$' && s.as_char_slice().get(pos+1) == Some(&'(')) + { + return Some(pos); + } + } +} + +pub fn comment_end(s: &wstr, mut pos: usize) -> usize { + loop { + pos += 1; + if pos == s.len() || s.char_at(pos) == '\n' { + return pos; + } + } +} + +/// Tests if this character can be a part of a string. Hash (#) starts a comment if it's the first +/// character in a token; otherwise it is considered a string character. See issue #953. +fn tok_is_string_character(c: char, next: Option) -> bool { + match c { + // Unconditional separators. + '\0' | ' ' | '\n' | '|' | '\t' | ';' | '\r' | '<' | '>' => false, + '&' => { + if feature_test(FeatureFlag::ampersand_nobg_in_token) { + // Unlike in other shells, '&' is not special if followed by a string character. + next.map(|nc| tok_is_string_character(nc, None)) + .unwrap_or(false) + } else { + false + } + } + _ => true, + } +} + +/// Quick test to catch the most common 'non-magical' characters, makes read_string slightly faster +/// by adding a fast path for the most common characters. This is obviously not a suitable +/// replacement for iswalpha. +fn myal(c: char) -> bool { + ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) +} + +#[derive(Clone, Copy, PartialEq, Eq)] +struct TokModes(u8); + +const TOK_MODE_REGULAR_TEXT: TokModes = TokModes(0); // regular text +const TOK_MODE_SUBSHELL: TokModes = TokModes(1 << 0); // inside of subshell parentheses +const TOK_MODE_ARRAY_BRACKETS: TokModes = TokModes(1 << 1); // inside of array brackets +const TOK_MODE_CURLY_BRACES: TokModes = TokModes(1 << 2); +const TOK_MODE_CHAR_ESCAPE: TokModes = TokModes(1 << 3); + +impl BitAnd for TokModes { + type Output = bool; + fn bitand(self, rhs: Self) -> Self::Output { + (self.0 & rhs.0) != 0 + } +} +impl BitAndAssign for TokModes { + fn bitand_assign(&mut self, rhs: Self) { + self.0 &= rhs.0 + } +} +impl BitOrAssign for TokModes { + fn bitor_assign(&mut self, rhs: Self) { + self.0 |= rhs.0 + } +} +impl Not for TokModes { + type Output = TokModes; + fn not(self) -> Self::Output { + TokModes(!self.0) + } +} + +/// Tests if this character can delimit tokens. +pub fn is_token_delimiter(c: char, next: Option) -> bool { + c == '(' || !tok_is_string_character(c, next) +} + +fn is_token_delimiter_ffi(c: wchar_t, next: SharedPtr) -> bool { + is_token_delimiter( + c.try_into().unwrap(), + next.as_ref().map(|c| (*c).try_into().unwrap()), + ) +} + +/// \return the_ffi first token from the string, skipping variable assignments like A=B. +pub fn tok_command(str: &wstr) -> WString { + let mut t = Tokenizer::new(str, TokFlags(0)); + while let Some(token) = t.next() { + if token.type_ != TokenType::string { + return WString::new(); + } + let text = t.text_of(&token); + if variable_assignment_equals_pos(text).is_some() { + continue; + } + return text.to_owned(); + } + WString::new() +} +fn tok_command_ffi(str: &CxxWString) -> UniquePtr { + tok_command(&str.from_ffi()).to_ffi() +} + +impl TryFrom<&wstr> for PipeOrRedir { + type Error = (); + + /// Examples of supported syntaxes. + /// Note we are only responsible for parsing the redirection part, not 'cmd' or 'file'. + /// + /// cmd | cmd normal pipe + /// cmd &| cmd normal pipe plus stderr-merge + /// cmd >| cmd pipe with explicit fd + /// cmd 2>| cmd pipe with explicit fd + /// cmd < file stdin redirection + /// cmd > file redirection + /// cmd >> file appending redirection + /// cmd >? file noclobber redirection + /// cmd >>? file appending noclobber redirection + /// cmd 2> file file redirection with explicit fd + /// cmd >&2 fd redirection with no explicit src fd (stdout is used) + /// cmd 1>&2 fd redirection with an explicit src fd + /// cmd <&2 fd redirection with no explicit src fd (stdin is used) + /// cmd 3<&0 fd redirection with an explicit src fd + /// cmd &> file redirection with stderr merge + /// cmd ^ file caret (stderr) redirection, perhaps disabled via feature flags + /// cmd ^^ file caret (stderr) redirection, perhaps disabled via feature flags + fn try_from(buff: &wstr) -> Result { + // Extract a range of leading fd. + let mut cursor = buff.chars().take_while(|c| c.is_ascii_digit()).count(); + let fd_buff = &buff[..cursor]; + let has_fd = !fd_buff.is_empty(); + + // Try consuming a given character. + // Return true if consumed. On success, advances cursor. + let try_consume = |cursor: &mut usize, c| -> bool { + if buff.char_at(*cursor) != c { + false + } else { + *cursor += 1; + true + } + }; + + // Like try_consume, but asserts on failure. + let consume = |cursor: &mut usize, c| { + assert!(buff.char_at(*cursor) == c, "Failed to consume char"); + *cursor += 1; + }; + + let c = buff.char_at(cursor); + let mut result = PipeOrRedir { + fd: -1, + is_pipe: false, + mode: RedirectionMode::overwrite, + stderr_merge: false, + consumed: 0, + }; + match c { + '|' => { + if has_fd { + // Like 123| + return Err(()); + } + consume(&mut cursor, '|'); + assert!( + buff.char_at(cursor) != '|', + "|| passed as redirection, this should have been handled as 'or' by the caller" + ); + result.fd = STDOUT_FILENO; + result.is_pipe = true; + } + '>' => { + consume(&mut cursor, '>'); + if try_consume(&mut cursor, '>') { + result.mode = RedirectionMode::append; + } + if try_consume(&mut cursor, '|') { + // Note we differ from bash here. + // Consider `echo foo 2>| bar` + // In fish, this is a *pipe*. Run bar as a command and attach foo's stderr to bar's + // stdin, while leaving stdout as tty. + // In bash, this is a *redirection* to bar as a file. It is like > but ignores + // noclobber. + result.is_pipe = true; + result.fd = if has_fd { + parse_fd(fd_buff) // like 2>| + } else { + STDOUT_FILENO + }; // like >| + } else if try_consume(&mut cursor, '&') { + // This is a redirection to an fd. + // Note that we allow ">>&", but it's still just writing to the fd - "appending" to + // it doesn't make sense. + result.mode = RedirectionMode::fd; + result.fd = if has_fd { + parse_fd(fd_buff) // like 1>&2 + } else { + STDOUT_FILENO // like >&2 + }; + } else { + // This is a redirection to a file. + result.fd = if has_fd { + parse_fd(fd_buff) // like 1> file.txt + } else { + STDOUT_FILENO // like > file.txt + }; + if result.mode != RedirectionMode::append { + result.mode = RedirectionMode::overwrite; + } + // Note 'echo abc >>? file' is valid: it means append and noclobber. + // But here "noclobber" means the file must not exist, so appending + // can be ignored. + if try_consume(&mut cursor, '?') { + result.mode = RedirectionMode::noclob; + } + } + } + '<' => { + consume(&mut cursor, '<'); + if try_consume(&mut cursor, '&') { + result.mode = RedirectionMode::fd; + } else { + result.mode = RedirectionMode::input; + } + result.fd = if has_fd { + parse_fd(fd_buff) // like 1<&3 or 1< /tmp/file.txt + } else { + STDIN_FILENO // like <&3 or < /tmp/file.txt + }; + } + '&' => { + consume(&mut cursor, '&'); + if try_consume(&mut cursor, '|') { + // &| is pipe with stderr merge. + result.fd = STDOUT_FILENO; + result.is_pipe = true; + result.stderr_merge = true; + } else if try_consume(&mut cursor, '>') { + result.fd = STDOUT_FILENO; + result.stderr_merge = true; + result.mode = RedirectionMode::overwrite; + if try_consume(&mut cursor, '>') { + result.mode = RedirectionMode::append; // like &>> + } + if try_consume(&mut cursor, '?') { + result.mode = RedirectionMode::noclob; // like &>? or &>>? + } + } else { + return Err(()); + } + } + _ => { + // Not a redirection. + return Err(()); + } + } + + result.consumed = cursor; + assert!( + result.consumed > 0, + "Should have consumed at least one character on success" + ); + Ok(result) + } +} + +fn pipe_or_redir_from_string(buff: wcharz_t) -> UniquePtr { + match PipeOrRedir::try_from(Into::<&wstr>::into(buff)) { + Ok(p) => UniquePtr::new(p), + Err(()) => UniquePtr::null(), + } +} + +impl PipeOrRedir { + /// \return the oflags (as in open(2)) for this redirection. + pub fn oflags(&self) -> c_int { + self.mode.oflags().unwrap_or(-1) + } + + // \return if we are "valid". Here "valid" means only that the source fd did not overflow. + // For example 99999999999> is invalid. + fn is_valid(&self) -> bool { + self.fd >= 0 + } + + // \return the token type for this redirection. + fn token_type(&self) -> TokenType { + if self.is_pipe { + TokenType::pipe + } else { + TokenType::redirect + } + } +} + +// Parse an fd from the non-empty string [start, end), all of which are digits. +// Return the fd, or -1 on overflow. +fn parse_fd(s: &wstr) -> i32 { + assert!(!s.is_empty()); + let mut big_fd: usize = 0; + for c in s.chars() { + assert!(c.is_ascii_digit()); + big_fd = big_fd * 10 + (c.to_digit(10).unwrap() as usize); + if big_fd > (i32::MAX as usize) { + return -1; + } + } + assert!(big_fd <= (i32::MAX as usize), "big_fd should be in range"); + big_fd as i32 +} + +fn new_move_word_state_machine(syl: MoveWordStyle) -> Box { + Box::new(MoveWordStateMachine::new(syl)) +} + +impl MoveWordStateMachine { + pub fn new(style: MoveWordStyle) -> Self { + MoveWordStateMachine { state: 0, style } + } + + pub fn consume_char(&mut self, c: char) -> bool { + match self.style { + MoveWordStyle::move_word_style_punctuation => self.consume_char_punctuation(c), + MoveWordStyle::move_word_style_path_components => self.consume_char_path_components(c), + MoveWordStyle::move_word_style_whitespace => self.consume_char_whitespace(c), + _ => panic!(), + } + } + pub fn consume_char_ffi(&mut self, c: wchar_t) -> bool { + self.consume_char(c.try_into().unwrap()) + } + + pub fn reset(&mut self) { + self.state = 0; + } + + fn consume_char_punctuation(&mut self, c: char) -> bool { + const S_ALWAYS_ONE: u8 = 0; + const S_REST: u8 = 1; + const S_WHITESPACE_REST: u8 = 2; + const S_WHITESPACE: u8 = 3; + const S_ALPHANUMERIC: u8 = 4; + const S_END: u8 = 5; + + let mut consumed = false; + while self.state != S_END && !consumed { + match self.state { + S_ALWAYS_ONE => { + // Always consume the first character. + consumed = true; + if c.is_whitespace() { + self.state = S_WHITESPACE; + } else if c.is_alphanumeric() { + self.state = S_ALPHANUMERIC; + } else { + // Don't allow switching type (ws->nonws) after non-whitespace and + // non-alphanumeric. + self.state = S_REST; + } + } + S_REST => { + if c.is_whitespace() { + // Consume only trailing whitespace. + self.state = S_WHITESPACE_REST; + } else if c.is_alphanumeric() { + // Consume only alnums. + self.state = S_ALPHANUMERIC; + } else { + consumed = false; + self.state = S_END; + } + } + S_WHITESPACE_REST | S_WHITESPACE => { + // "whitespace" consumes whitespace and switches to alnums, + // "whitespace_rest" only consumes whitespace. + if c.is_whitespace() { + // Consumed whitespace. + consumed = true; + } else { + self.state = if self.state == S_WHITESPACE { + S_ALPHANUMERIC + } else { + S_END + }; + } + } + S_ALPHANUMERIC => { + if c.is_alphanumeric() { + consumed = true; // consumed alphanumeric + } else { + self.state = S_END; + } + } + _ => {} + } + } + consumed + } + + fn consume_char_path_components(&mut self, c: char) -> bool { + const S_INITIAL_PUNCTUATION: u8 = 0; + const S_WHITESPACE: u8 = 1; + const S_SEPARATOR: u8 = 2; + const S_SLASH: u8 = 3; + const S_PATH_COMPONENT_CHARACTERS: u8 = 4; + const S_INITIAL_SEPARATOR: u8 = 5; + const S_END: u8 = 6; + + let mut consumed = false; + while self.state != S_END && !consumed { + match self.state { + S_INITIAL_PUNCTUATION => { + if !is_path_component_character(c) && !c.is_whitespace() { + self.state = S_INITIAL_SEPARATOR; + } else { + if !is_path_component_character(c) { + consumed = true; + } + self.state = S_WHITESPACE; + } + } + S_WHITESPACE => { + if c.is_whitespace() { + consumed = true; // consumed whitespace + } else if c == '/' || is_path_component_character(c) { + self.state = S_SLASH; // path component + } else { + self.state = S_SEPARATOR; // path separator + } + } + S_SEPARATOR => { + if !c.is_whitespace() && !is_path_component_character(c) { + consumed = true; // consumed separator + } else { + self.state = S_END; + } + } + S_SLASH => { + if c == '/' { + consumed = true; // consumed slash + } else { + self.state = S_PATH_COMPONENT_CHARACTERS; + } + } + S_PATH_COMPONENT_CHARACTERS => { + if is_path_component_character(c) { + consumed = true; // consumed string character except slash + } else { + self.state = S_END; + } + } + S_INITIAL_SEPARATOR => { + if is_path_component_character(c) { + consumed = true; + self.state = S_PATH_COMPONENT_CHARACTERS; + } else if c.is_whitespace() { + self.state = S_END; + } else { + consumed = true; + } + } + _ => {} + } + } + consumed + } + + fn consume_char_whitespace(&mut self, c: char) -> bool { + // Consume a "word" of printable characters plus any leading whitespace. + const S_ALWAYS_ONE: u8 = 0; + const S_BLANK: u8 = 1; + const S_GRAPH: u8 = 2; + const S_END: u8 = 3; + + let mut consumed = false; + while self.state != S_END && !consumed { + match self.state { + S_ALWAYS_ONE => { + consumed = true; // always consume the first character + // If it's not whitespace, only consume those from here. + if !c.is_whitespace() { + self.state = S_GRAPH; + } else { + // If it's whitespace, keep consuming whitespace until the graphs. + self.state = S_BLANK; + } + } + S_BLANK => { + if c.is_whitespace() { + consumed = true; // consumed whitespace + } else { + self.state = S_GRAPH; + } + } + S_GRAPH => { + if !c.is_whitespace() { + consumed = true; // consumed printable non-space + } else { + self.state = S_END; + } + } + _ => {} + } + } + consumed + } +} + +fn is_path_component_character(c: char) -> bool { + tok_is_string_character(c, None) && !L!("/={,}'\":@").as_char_slice().contains(&c) +} + +/// The position of the equal sign in a variable assignment like foo=bar. +/// +/// Return the location of the equals sign, or none if the string does +/// not look like a variable assignment like FOO=bar. The detection +/// works similar as in some POSIX shells: only letters and numbers qre +/// allowed on the left hand side, no quotes or escaping. +pub fn variable_assignment_equals_pos(txt: &wstr) -> Option { + let mut found_potential_variable = false; + + // TODO bracket indexing + for (i, c) in txt.chars().enumerate() { + if !found_potential_variable { + if !valid_var_name_char(c as wchar_t) { + return None; + } + found_potential_variable = true; + } else { + if c == '=' { + return Some(i); + } + if !valid_var_name_char(c as wchar_t) { + return None; + } + } + } + None +} + +fn variable_assignment_equals_pos_ffi(txt: &CxxWString) -> SharedPtr { + match variable_assignment_equals_pos(&txt.from_ffi()) { + Some(p) => SharedPtr::new(p), + None => SharedPtr::null(), + } +} diff --git a/fish-rust/src/topic_monitor.rs b/fish-rust/src/topic_monitor.rs new file mode 100644 index 000000000..4ef936988 --- /dev/null +++ b/fish-rust/src/topic_monitor.rs @@ -0,0 +1,642 @@ +/*! Topic monitoring support. + +Topics are conceptually "a thing that can happen." For example, +delivery of a SIGINT, a child process exits, etc. It is possible to post to a topic, which means +that that thing happened. + +Associated with each topic is a current generation, which is a 64 bit value. When you query a +topic, you get back a generation. If on the next query the generation has increased, then it +indicates someone posted to the topic. + +For example, if you are monitoring a child process, you can query the sigchld topic. If it has +increased since your last query, it is possible that your child process has exited. + +Topic postings may be coalesced. That is there may be two posts to a given topic, yet the +generation only increases by 1. The only guarantee is that after a topic post, the current +generation value is larger than any value previously queried. + +Tying this all together is the topic_monitor_t. This provides the current topic generations, and +also provides the ability to perform a blocking wait for any topic to change in a particular topic +set. This is the real power of topics: you can wait for a sigchld signal OR a thread exit. +*/ + +use crate::fd_readable_set::fd_readable_set_t; +use crate::fds::{self, autoclose_pipes_t}; +use crate::ffi::{self as ffi, c_int}; +use crate::flog::FLOG; +use crate::wchar::{widestrs, wstr, WString}; +use crate::wchar_ffi::wcharz; +use nix::errno::Errno; +use nix::unistd; +use std::cell::UnsafeCell; +use std::mem; +use std::pin::Pin; +use std::sync::{ + atomic::{AtomicU8, Ordering}, + Condvar, Mutex, MutexGuard, +}; + +#[cxx::bridge] +mod topic_monitor_ffi { + /// Simple value type containing the values for a topic. + /// This should be kept in sync with topic_t. + #[derive(Default, Copy, Clone, Debug, PartialEq, Eq)] + struct generation_list_t { + pub sighupint: u64, + pub sigchld: u64, + pub internal_exit: u64, + } + + extern "Rust" { + fn invalid_generations() -> generation_list_t; + fn set_min_from(self: &mut generation_list_t, topic: topic_t, other: &generation_list_t); + fn at(self: &generation_list_t, topic: topic_t) -> u64; + fn at_mut(self: &mut generation_list_t, topic: topic_t) -> &mut u64; + //fn describe(self: &generation_list_t) -> UniquePtr; + } + + /// The list of topics which may be observed. + #[repr(u8)] + #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] + pub enum topic_t { + sighupint, // Corresponds to both SIGHUP and SIGINT signals. + sigchld, // Corresponds to SIGCHLD signal. + internal_exit, // Corresponds to an internal process exit. + } + + extern "Rust" { + type topic_monitor_t; + fn new_topic_monitor() -> Box; + + fn topic_monitor_principal() -> &'static topic_monitor_t; + fn post(self: &topic_monitor_t, topic: topic_t); + fn current_generations(self: &topic_monitor_t) -> generation_list_t; + fn generation_for_topic(self: &topic_monitor_t, topic: topic_t) -> u64; + fn check(self: &topic_monitor_t, gens: *mut generation_list_t, wait: bool) -> bool; + } +} + +pub use topic_monitor_ffi::{generation_list_t, topic_t}; +pub type generation_t = u64; + +/// A generation value which indicates the topic is not of interest. +pub const invalid_generation: generation_t = std::u64::MAX; + +pub fn all_topics() -> [topic_t; 3] { + [topic_t::sighupint, topic_t::sigchld, topic_t::internal_exit] +} + +#[widestrs] +impl generation_list_t { + pub fn new() -> Self { + Self::default() + } + + fn describe(&self) -> WString { + let mut result = WString::new(); + for gen in self.as_array() { + if result.len() > 0 { + result.push(','); + } + if gen == invalid_generation { + result.push_str("-1"); + } else { + result.push_str(&gen.to_string()); + } + } + return result; + } + + /// \return the a mutable reference to the value for a topic. + pub fn at_mut(&mut self, topic: topic_t) -> &mut generation_t { + match topic { + topic_t::sighupint => &mut self.sighupint, + topic_t::sigchld => &mut self.sigchld, + topic_t::internal_exit => &mut self.internal_exit, + _ => panic!("invalid topic"), + } + } + + /// \return the value for a topic. + pub fn at(&self, topic: topic_t) -> generation_t { + match topic { + topic_t::sighupint => self.sighupint, + topic_t::sigchld => self.sigchld, + topic_t::internal_exit => self.internal_exit, + _ => panic!("invalid topic"), + } + } + + /// \return ourselves as an array. + pub fn as_array(&self) -> [generation_t; 3] { + [self.sighupint, self.sigchld, self.internal_exit] + } + + /// Set the value of \p topic to the smaller of our value and the value in \p other. + pub fn set_min_from(&mut self, topic: topic_t, other: &generation_list_t) { + if self.at(topic) > other.at(topic) { + *self.at_mut(topic) = other.at(topic); + } + } + + /// \return whether a topic is valid. + pub fn is_valid(&self, topic: topic_t) -> bool { + self.at(topic) != invalid_generation + } + + /// \return whether any topic is valid. + pub fn any_valid(&self) -> bool { + let mut valid = false; + for gen in self.as_array() { + if gen != invalid_generation { + valid = true; + } + } + valid + } + + /// Generation list containing invalid generations only. + pub fn invalids() -> generation_list_t { + generation_list_t { + sighupint: invalid_generation, + sigchld: invalid_generation, + internal_exit: invalid_generation, + } + } +} + +/// CXX wrapper as it does not support member functions. +pub fn invalid_generations() -> generation_list_t { + generation_list_t::invalids() +} + +/// A simple binary semaphore. +/// On systems that do not support unnamed semaphores (macOS in particular) this is built on top of +/// a self-pipe. Note that post() must be async-signal safe. +pub struct binary_semaphore_t { + // Whether our semaphore was successfully initialized. + sem_ok_: bool, + + // The semaphore, if initalized. + // This is Box'd so it has a stable address. + sem_: Pin>>, + + // Pipes used to emulate a semaphore, if not initialized. + pipes_: autoclose_pipes_t, +} + +impl binary_semaphore_t { + pub fn new() -> binary_semaphore_t { + #[allow(unused_mut, unused_assignments)] + let mut sem_ok_ = false; + // sem_t does not have an initializer in Rust so we use zeroed(). + #[allow(unused_mut)] + let mut sem_ = Pin::from(Box::new(UnsafeCell::new(unsafe { mem::zeroed() }))); + let mut pipes_ = autoclose_pipes_t::default(); + // sem_init always fails with ENOSYS on Mac and has an annoying deprecation warning. + // On BSD sem_init uses a file descriptor under the hood which doesn't get CLOEXEC (see #7304). + // So use fast semaphores on Linux only. + #[cfg(target_os = "linux")] + { + let res = unsafe { libc::sem_init(sem_.get(), 0, 0) }; + sem_ok_ = res == 0; + } + if !sem_ok_ { + let pipes = fds::make_autoclose_pipes(); + assert!(pipes.is_some(), "Failed to make pubsub pipes"); + pipes_ = pipes.unwrap(); + + // // Whoof. Thread Sanitizer swallows signals and replays them at its leisure, at the point + // // where instrumented code makes certain blocking calls. But tsan cannot interrupt a signal + // // call, so if we're blocked in read() (like the topic monitor wants to be!), we'll never + // // receive SIGCHLD and so deadlock. So if tsan is enabled, we mark our fd as non-blocking + // // (so reads will never block) and use select() to poll it. + if cfg!(feature = "FISH_TSAN_WORKAROUNDS") { + ffi::make_fd_nonblocking(c_int(pipes_.read.fd())); + } + } + binary_semaphore_t { + sem_ok_, + sem_, + pipes_, + } + } + + /// Release a waiting thread. + #[widestrs] + pub fn post(&self) { + // Beware, we are in a signal handler. + if self.sem_ok_ { + let res = unsafe { libc::sem_post(self.sem_.get()) }; + // sem_post is non-interruptible. + if res < 0 { + self.die("sem_post"L); + } + } else { + // Write exactly one byte. + let success; + loop { + let v: u8 = 0; + let ret = unistd::write(self.pipes_.write.fd(), std::slice::from_ref(&v)); + if ret.err() == Some(Errno::EINTR) { + continue; + } + success = ret.is_ok(); + break; + } + if !success { + self.die("write"L); + } + } + } + + /// Wait for a post. + /// This loops on EINTR. + #[widestrs] + pub fn wait(&self) { + if self.sem_ok_ { + let mut res; + loop { + res = unsafe { libc::sem_wait(self.sem_.get()) }; + if res < 0 && Errno::last() == Errno::EINTR { + continue; + } + break; + } + // Other errors here are very unexpected. + if res < 0 { + self.die("sem_wait"L); + } + } else { + let fd = self.pipes_.read.fd(); + // We must read exactly one byte. + loop { + // Under tsan our notifying pipe is non-blocking, so we would busy-loop on the read() + // call until data is available (that is, fish would use 100% cpu while waiting for + // processes). This call prevents that. + if cfg!(feature = "FISH_TSAN_WORKAROUNDS") { + let _ = fd_readable_set_t::is_fd_readable(fd, fd_readable_set_t::kNoTimeout); + } + let mut ignored: u8 = 0; + let amt = unistd::read(fd, std::slice::from_mut(&mut ignored)); + if amt.ok() == Some(1) { + break; + } + // EAGAIN should only be returned in TSan case. + if amt.is_err() + && (amt.err() != Some(Errno::EINTR) && amt.err() != Some(Errno::EAGAIN)) + { + self.die("read"L); + } + } + } + } + + pub fn die(&self, msg: &wstr) { + ffi::wperror(wcharz!(msg)); + panic!("die"); + } +} + +impl Drop for binary_semaphore_t { + fn drop(&mut self) { + // We never use sem_t on Mac. The #ifdef avoids deprecation warnings. + #[cfg(target_os = "linux")] + { + if self.sem_ok_ { + _ = unsafe { libc::sem_destroy(self.sem_.get()) }; + } + } + } +} + +impl Default for binary_semaphore_t { + fn default() -> Self { + Self::new() + } +} + +/// The topic monitor class. This permits querying the current generation values for topics, +/// optionally blocking until they increase. +/// What we would like to write is that we have a set of topics, and threads wait for changes on a +/// condition variable which is tickled in post(). But this can't work because post() may be called +/// from a signal handler and condition variables are not async-signal safe. +/// So instead the signal handler announces changes via a binary semaphore. +/// In the wait case, what generally happens is: +/// A thread fetches the generations, see they have not changed, and then decides to try to wait. +/// It does so by atomically swapping in STATUS_NEEDS_WAKEUP to the status bits. +/// If that succeeds, it waits on the binary semaphore. The post() call will then wake the thread +/// up. If if failed, then either a post() call updated the status values (so perhaps there is a +/// new topic post) or some other thread won the race and called wait() on the semaphore. Here our +/// thread will wait on the data_notifier_ queue. +type topic_bitmask_t = u8; + +fn topic_to_bit(t: topic_t) -> topic_bitmask_t { + 1 << t.repr +} + +// Some stuff that needs to be protected by the same lock. +#[derive(Default)] +struct data_t { + /// The current values. + current: generation_list_t, + + /// A flag indicating that there is a current reader. + /// The 'reader' is responsible for calling sema_.wait(). + has_reader: bool, +} + +/// Sentinel status value indicating that a thread is waiting and needs a wakeup. +/// Note it is an error for this bit to be set and also any topic bit. +const STATUS_NEEDS_WAKEUP: u8 = 128; +type status_bits_t = u8; + +#[derive(Default)] +pub struct topic_monitor_t { + data_: Mutex, + + /// Condition variable for broadcasting notifications. + /// This is associated with data_'s mutex. + data_notifier_: Condvar, + + /// A status value which describes our current state, managed via atomics. + /// Three possibilities: + /// 0: no changed topics, no thread is waiting. + /// 128: no changed topics, some thread is waiting and needs wakeup. + /// anything else: some changed topic, no thread is waiting. + /// Note that if the msb is set (status == 128) no other bit may be set. + status_: AtomicU8, + + /// Binary semaphore used to communicate changes. + /// If status_ is STATUS_NEEDS_WAKEUP, then a thread has commited to call wait() on our sema and + /// this must be balanced by the next call to post(). Note only one thread may wait at a time. + sema_: binary_semaphore_t, +} + +/// The principal topic monitor. +/// Do not attempt to move this into a lazy_static, it must be accessed from a signal handler. +static mut s_principal: *const topic_monitor_t = std::ptr::null(); + +/// Create a new topic monitor. Exposed for the FFI. +pub fn new_topic_monitor() -> Box { + Box::default() +} + +impl topic_monitor_t { + /// Initialize the principal monitor, and return it. + /// This should be called only on the main thread. + pub fn initialize() -> &'static Self { + unsafe { + if s_principal.is_null() { + // We simply leak. + s_principal = Box::into_raw(new_topic_monitor()); + } + &*s_principal + } + } + + pub fn post(&self, topic: topic_t) { + // Beware, we may be in a signal handler! + // Atomically update the pending topics. + let topicbit = topic_to_bit(topic); + const relaxed: Ordering = Ordering::Relaxed; + + // CAS in our bit, capturing the old status value. + let mut oldstatus: status_bits_t = 0; + let mut cas_success = false; + while !cas_success { + oldstatus = self.status_.load(relaxed); + // Clear wakeup bit and set our topic bit. + let mut newstatus = oldstatus; + newstatus &= !STATUS_NEEDS_WAKEUP; // note: bitwise not + newstatus |= topicbit; + cas_success = self + .status_ + .compare_exchange_weak(oldstatus, newstatus, relaxed, relaxed) + .is_ok(); + } + // Note that if the STATUS_NEEDS_WAKEUP bit is set, no other bits must be set. + assert!( + ((oldstatus == STATUS_NEEDS_WAKEUP) == ((oldstatus & STATUS_NEEDS_WAKEUP) != 0)), + "If STATUS_NEEDS_WAKEUP is set no other bits should be set" + ); + + // If the bit was already set, then someone else posted to this topic and nobody has reacted to + // it yet. In that case we're done. + if (oldstatus & topicbit) != 0 { + return; + } + + // We set a new bit. + // Check if we should wake up a thread because it was waiting. + if (oldstatus & STATUS_NEEDS_WAKEUP) != 0 { + std::sync::atomic::fence(Ordering::Release); + self.sema_.post(); + } + } + + /// Apply any pending updates to the data. + /// This accepts data because it must be locked. + /// \return the updated generation list. + fn updated_gens_in_data(&self, data: &mut MutexGuard) -> generation_list_t { + // Atomically acquire the pending updates, swapping in 0. + // If there are no pending updates (likely) or a thread is waiting, just return. + // Otherwise CAS in 0 and update our topics. + const relaxed: Ordering = Ordering::Relaxed; + let mut changed_topic_bits: topic_bitmask_t = 0; + let mut cas_success = false; + while !cas_success { + changed_topic_bits = self.status_.load(relaxed); + if changed_topic_bits == 0 || changed_topic_bits == STATUS_NEEDS_WAKEUP { + return data.current; + } + cas_success = self + .status_ + .compare_exchange_weak(changed_topic_bits, 0, relaxed, relaxed) + .is_ok(); + } + assert!( + (changed_topic_bits & STATUS_NEEDS_WAKEUP) == 0, + "Thread waiting bit should not be set" + ); + + // Update the current generation with our topics and return it. + for topic in all_topics() { + if changed_topic_bits & topic_to_bit(topic) != 0 { + *data.current.at_mut(topic) += 1; + FLOG!( + topic_monitor, + "Updating topic", + topic, + "to", + data.current.at(topic) + ); + } + } + // Report our change. + self.data_notifier_.notify_all(); + return data.current; + } + + /// \return the current generation list, opportunistically applying any pending updates. + fn updated_gens(&self) -> generation_list_t { + let mut data = self.data_.lock().unwrap(); + return self.updated_gens_in_data(&mut data); + } + + /// Access the current generations. + pub fn current_generations(self: &topic_monitor_t) -> generation_list_t { + self.updated_gens() + } + + /// Access the generation for a topic. + pub fn generation_for_topic(self: &topic_monitor_t, topic: topic_t) -> generation_t { + self.current_generations().at(topic) + } + + /// Given a list of input generations, attempt to update them to something newer. + /// If \p gens is older, then just return those by reference, and directly return false (not + /// becoming the reader). + /// If \p gens is current and there is not a reader, then do not update \p gens and return true, + /// indicating we should become the reader. Now it is our responsibility to wait on the + /// semaphore and notify on a change via the condition variable. If \p gens is current, and + /// there is already a reader, then wait until the reader notifies us and try again. + fn try_update_gens_maybe_becoming_reader(&self, gens: &mut generation_list_t) -> bool { + let mut become_reader = false; + let mut data = self.data_.lock().unwrap(); + loop { + // See if the updated gen list has changed. If so we don't need to become the reader. + let current = self.updated_gens_in_data(&mut data); + // FLOG(topic_monitor, "TID", thread_id(), "local ", gens->describe(), ": current", + // current.describe()); + if *gens != current { + *gens = current; + break; + } + + // The generations haven't changed. Perhaps we become the reader. + // Note we still hold the lock, so this cannot race with any other thread becoming the + // reader. + if data.has_reader { + // We already have a reader, wait for it to notify us and loop again. + data = self.data_notifier_.wait(data).unwrap(); + continue; + } else { + // We will try to become the reader. + // Reader bit should not be set in this case. + assert!( + (self.status_.load(Ordering::Relaxed) & STATUS_NEEDS_WAKEUP) == 0, + "No thread should be waiting" + ); + // Try becoming the reader by marking the reader bit. + let expected_old: status_bits_t = 0; + if self + .status_ + .compare_exchange( + expected_old, + STATUS_NEEDS_WAKEUP, + Ordering::SeqCst, + Ordering::SeqCst, + ) + .is_err() + { + // We failed to become the reader, perhaps because another topic post just arrived. + // Loop again. + continue; + } + // We successfully did a CAS from 0 -> STATUS_NEEDS_WAKEUP. + // Now any successive topic post must signal us. + //FLOG(topic_monitor, "TID", thread_id(), "becoming reader"); + become_reader = true; + data.has_reader = true; + break; + } + } + return become_reader; + } + + /// Wait for some entry in the list of generations to change. + /// \return the new gens. + fn await_gens(&self, input_gens: &generation_list_t) -> generation_list_t { + let mut gens = *input_gens; + while gens == *input_gens { + let become_reader = self.try_update_gens_maybe_becoming_reader(&mut gens); + if become_reader { + // Now we are the reader. Read from the pipe, and then update with any changes. + // Note we no longer hold the lock. + assert!( + gens == *input_gens, + "Generations should not have changed if we are the reader." + ); + + // Wait to be woken up. + self.sema_.wait(); + + // We are finished waiting. We must stop being the reader, and post on the condition + // variable to wake up any other threads waiting for us to finish reading. + let mut data = self.data_.lock().unwrap(); + gens = data.current; + // FLOG(topic_monitor, "TID", thread_id(), "local", input_gens.describe(), + // "read() complete, current is", gens.describe()); + assert!(data.has_reader, "We should be the reader"); + data.has_reader = false; + self.data_notifier_.notify_all(); + } + } + return gens; + } + + /// For each valid topic in \p gens, check to see if the current topic is larger than + /// the value in \p gens. + /// If \p wait is set, then wait if there are no changes; otherwise return immediately. + /// \return true if some topic changed, false if none did. + /// On a true return, this updates the generation list \p gens. + pub fn check(&self, gens: *mut generation_list_t, wait: bool) -> bool { + assert!(!gens.is_null(), "gens must not be null"); + let gens = unsafe { &mut *gens }; + if !gens.any_valid() { + return false; + } + + let mut current: generation_list_t = self.updated_gens(); + let mut changed = false; + loop { + // Load the topic list and see if anything has changed. + for topic in all_topics() { + if gens.is_valid(topic) { + assert!( + gens.at(topic) <= current.at(topic), + "Incoming gen count exceeded published count" + ); + if gens.at(topic) < current.at(topic) { + *gens.at_mut(topic) = current.at(topic); + changed = true; + } + } + } + + // If we're not waiting, or something changed, then we're done. + if !wait || changed { + break; + } + + // Wait until our gens change. + current = self.await_gens(¤t); + } + return changed; + } +} + +pub fn topic_monitor_init() { + topic_monitor_t::initialize(); +} + +pub fn topic_monitor_principal() -> &'static topic_monitor_t { + unsafe { + assert!( + !s_principal.is_null(), + "Principal topic monitor not initialized" + ); + &*s_principal + } +} diff --git a/fish-rust/src/util.rs b/fish-rust/src/util.rs new file mode 100644 index 000000000..f9c651b06 --- /dev/null +++ b/fish-rust/src/util.rs @@ -0,0 +1,311 @@ +//! Generic utilities library. + +use crate::ffi::wcharz_t; +use crate::wchar::wstr; +use std::cmp::Ordering; +use std::time; + +#[cxx::bridge] +mod ffi { + extern "C++" { + include!("wutil.h"); + type wcharz_t = super::wcharz_t; + } + + extern "Rust" { + #[cxx_name = "wcsfilecmp"] + fn wcsfilecmp_ffi(a: wcharz_t, b: wcharz_t) -> i32; + #[cxx_name = "wcsfilecmp_glob"] + fn wcsfilecmp_glob_ffi(a: wcharz_t, b: wcharz_t) -> i32; + fn get_time() -> i64; + } +} + +fn ordering_to_int(ord: Ordering) -> i32 { + match ord { + Ordering::Less => -1, + Ordering::Equal => 0, + Ordering::Greater => 1, + } +} + +fn wcsfilecmp_glob_ffi(a: wcharz_t, b: wcharz_t) -> i32 { + ordering_to_int(wcsfilecmp_glob(a.into(), b.into())) +} + +fn wcsfilecmp_ffi(a: wcharz_t, b: wcharz_t) -> i32 { + ordering_to_int(wcsfilecmp(a.into(), b.into())) +} + +/// Compares two wide character strings with an (arguably) intuitive ordering. This function tries +/// to order strings in a way which is intuitive to humans with regards to sorting strings +/// containing numbers. +/// +/// Most sorting functions would sort the strings 'file1.txt' 'file5.txt' and 'file12.txt' as: +/// +/// file1.txt +/// file12.txt +/// file5.txt +/// +/// This function regards any sequence of digits as a single entity when performing comparisons, so +/// the output is instead: +/// +/// file1.txt +/// file5.txt +/// file12.txt +/// +/// Which most people would find more intuitive. +/// +/// This won't return the optimum results for numbers in bases higher than ten, such as hexadecimal, +/// but at least a stable sort order will result. +/// +/// This function performs a two-tiered sort, where difference in case and in number of leading +/// zeroes in numbers only have effect if no other differences between strings are found. This way, +/// a 'file1' and 'File1' will not be considered identical, and hence their internal sort order is +/// not arbitrary, but the names 'file1', 'File2' and 'file3' will still be sorted in the order +/// given above. +pub fn wcsfilecmp(a: &wstr, b: &wstr) -> Ordering { + let mut retval = Ordering::Equal; + let mut ai = 0; + let mut bi = 0; + while ai < a.len() && bi < b.len() { + let ac = a.as_char_slice()[ai]; + let bc = b.as_char_slice()[bi]; + if ac.is_ascii_digit() && bc.is_ascii_digit() { + let (ad, bd); + (retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]); + ai += ad; + bi += bd; + if retval != Ordering::Equal || ai == a.len() || bi == b.len() { + break; + } + continue; + } + + // Fast path: Skip towupper. + if ac == bc { + ai += 1; + bi += 1; + continue; + } + + // Sort dashes after Z - see #5634 + let mut acl = if ac == '-' { '[' } else { ac }; + let mut bcl = if bc == '-' { '[' } else { bc }; + // TODO Compare the tail (enabled by Rust's Unicode support). + acl = acl.to_uppercase().next().unwrap(); + bcl = bcl.to_uppercase().next().unwrap(); + + match acl.cmp(&bcl) { + Ordering::Equal => { + ai += 1; + bi += 1; + } + o => { + retval = o; + break; + } + } + } + + if retval != Ordering::Equal { + return retval; // we already know the strings aren't logically equal + } + + if ai == a.len() { + if bi == b.len() { + // The strings are logically equal. They may or may not be the same length depending on + // whether numbers were present but that doesn't matter. Disambiguate strings that + // differ by letter case or length. We don't bother optimizing the case where the file + // names are literally identical because that won't occur given how this function is + // used. And even if it were to occur (due to being reused in some other context) it + // would be so rare that it isn't worth optimizing for. + a.cmp(b) + } else { + Ordering::Less // string a is a prefix of b and b is longer + } + } else { + assert!(bi == b.len()); + Ordering::Greater // string b is a prefix of a and a is longer + } +} + +/// wcsfilecmp, but frozen in time for glob usage. +pub fn wcsfilecmp_glob(a: &wstr, b: &wstr) -> Ordering { + let mut retval = Ordering::Equal; + let mut ai = 0; + let mut bi = 0; + while ai < a.len() && bi < b.len() { + let ac = a.as_char_slice()[ai]; + let bc = b.as_char_slice()[bi]; + if ac.is_ascii_digit() && bc.is_ascii_digit() { + let (ad, bd); + (retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]); + ai += ad; + bi += bd; + // If we know the strings aren't logically equal or we've reached the end of one or both + // strings we can stop iterating over the chars in each string. + if retval != Ordering::Equal || ai == a.len() || bi == b.len() { + break; + } + continue; + } + + // Fast path: Skip towlower. + if ac == bc { + ai += 1; + bi += 1; + continue; + } + + // TODO Compare the tail (enabled by Rust's Unicode support). + let acl = ac.to_lowercase().next().unwrap(); + let bcl = bc.to_lowercase().next().unwrap(); + match acl.cmp(&bcl) { + Ordering::Equal => { + ai += 1; + bi += 1; + } + o => { + retval = o; + break; + } + } + } + + if retval != Ordering::Equal { + return retval; // we already know the strings aren't logically equal + } + + if ai == a.len() { + if bi == b.len() { + // The strings are logically equal. They may or may not be the same length depending on + // whether numbers were present but that doesn't matter. Disambiguate strings that + // differ by letter case or length. We don't bother optimizing the case where the file + // names are literally identical because that won't occur given how this function is + // used. And even if it were to occur (due to being reused in some other context) it + // would be so rare that it isn't worth optimizing for. + a.cmp(b) + } else { + Ordering::Less // string a is a prefix of b and b is longer + } + } else { + assert!(bi == b.len()); + Ordering::Greater // string b is a prefix of a and a is longer + } +} + +/// Get the current time in microseconds since Jan 1, 1970. +pub fn get_time() -> i64 { + match time::SystemTime::now().duration_since(time::UNIX_EPOCH) { + Ok(difference) => difference.as_micros() as i64, + Err(until_epoch) => -(until_epoch.duration().as_micros() as i64), + } +} + +// Compare the strings to see if they begin with an integer that can be compared and return the +// result of that comparison. +fn wcsfilecmp_leading_digits(a: &wstr, b: &wstr) -> (Ordering, usize, usize) { + // Ignore leading 0s. + let mut ai = a.as_char_slice().iter().take_while(|c| **c == '0').count(); + let mut bi = b.as_char_slice().iter().take_while(|c| **c == '0').count(); + + let mut ret = Ordering::Equal; + loop { + let ac = a.as_char_slice().get(ai).unwrap_or(&'\0'); + let bc = b.as_char_slice().get(bi).unwrap_or(&'\0'); + if ac.is_ascii_digit() && bc.is_ascii_digit() { + // We keep the cmp value for the + // first differing digit. + // + // If the numbers have the same length, that's the value. + if ret == Ordering::Equal { + // Comparing the string value is the same as numerical + // for wchar_t digits! + ret = ac.cmp(bc); + } + } else { + // We don't have negative numbers and we only allow ints, + // and we have already skipped leading zeroes, + // so the longer number is larger automatically. + if ac.is_ascii_digit() { + ret = Ordering::Greater; + } + if bc.is_ascii_digit() { + ret = Ordering::Less; + } + break; + } + ai += 1; + bi += 1; + } + + // For historical reasons, we skip trailing whitespace + // like fish_wcstol does! + // This is used in sorting globs, and that's supposed to be stable. + ai += a + .as_char_slice() + .iter() + .skip(ai) + .take_while(|c| c.is_whitespace()) + .count(); + bi += b + .as_char_slice() + .iter() + .skip(bi) + .take_while(|c| c.is_whitespace()) + .count(); + (ret, ai, bi) +} + +/// Verify the behavior of the `wcsfilecmp()` function. +#[test] +fn test_wcsfilecmp() { + use crate::wchar::L; + + macro_rules! validate { + ($str1:expr, $str2:expr, $expected_rc:expr) => { + assert_eq!(wcsfilecmp(L!($str1), L!($str2)), $expected_rc) + }; + } + + // Not using L as suffix because the macro munges error locations. + validate!("", "", Ordering::Equal); + validate!("", "def", Ordering::Less); + validate!("abc", "", Ordering::Greater); + validate!("abc", "def", Ordering::Less); + validate!("abc", "DEF", Ordering::Less); + validate!("DEF", "abc", Ordering::Greater); + validate!("abc", "abc", Ordering::Equal); + validate!("ABC", "ABC", Ordering::Equal); + validate!("AbC", "abc", Ordering::Less); + validate!("AbC", "ABC", Ordering::Greater); + validate!("def", "abc", Ordering::Greater); + validate!("1ghi", "1gHi", Ordering::Greater); + validate!("1ghi", "2ghi", Ordering::Less); + validate!("1ghi", "01ghi", Ordering::Greater); + validate!("1ghi", "02ghi", Ordering::Less); + validate!("01ghi", "1ghi", Ordering::Less); + validate!("1ghi", "002ghi", Ordering::Less); + validate!("002ghi", "1ghi", Ordering::Greater); + validate!("abc01def", "abc1def", Ordering::Less); + validate!("abc1def", "abc01def", Ordering::Greater); + validate!("abc12", "abc5", Ordering::Greater); + validate!("51abc", "050abc", Ordering::Greater); + validate!("abc5", "abc12", Ordering::Less); + validate!("5abc", "12ABC", Ordering::Less); + validate!("abc0789", "abc789", Ordering::Less); + validate!("abc0xA789", "abc0xA0789", Ordering::Greater); + validate!("abc002", "abc2", Ordering::Less); + validate!("abc002g", "abc002", Ordering::Greater); + validate!("abc002g", "abc02g", Ordering::Less); + validate!("abc002.txt", "abc02.txt", Ordering::Less); + validate!("abc005", "abc012", Ordering::Less); + validate!("abc02", "abc002", Ordering::Greater); + validate!("abc002.txt", "abc02.txt", Ordering::Less); + validate!("GHI1abc2.txt", "ghi1abc2.txt", Ordering::Less); + validate!("a0", "a00", Ordering::Less); + validate!("a00b", "a0b", Ordering::Less); + validate!("a0b", "a00b", Ordering::Greater); + validate!("a-b", "azb", Ordering::Greater); +} diff --git a/fish-rust/src/wchar.rs b/fish-rust/src/wchar.rs new file mode 100644 index 000000000..fd91fb6de --- /dev/null +++ b/fish-rust/src/wchar.rs @@ -0,0 +1,62 @@ +//! Support for wide strings. +//! +//! There are two wide string types that are commonly used: +//! - wstr: a string slice without a nul terminator. Like `&str` but wide chars. +//! - WString: an owning string without a nul terminator. Like `String` but wide chars. + +use crate::ffi; +pub use cxx::CxxWString; +pub use ffi::{wchar_t, wcharz_t}; +pub use widestring::utf32str; +pub use widestring::{Utf32Str as wstr, Utf32String as WString}; + +/// Creates a wstr string slice, like the "L" prefix of C++. +/// The result is of type wstr. +/// It is NOT nul-terminated. +macro_rules! L { + ($string:literal) => { + widestring::utf32str!($string) + }; +} +pub(crate) use L; + +/// A proc-macro for creating wide string literals using an L *suffix*. +/// Example usage: +/// ``` +/// #[widestrs] +/// pub fn func() { +/// let s = "hello"L; // type &'static wstr +/// } +/// ``` +/// Note: the resulting string is NOT nul-terminated. +pub use widestring_suffix::widestrs; + +/// Pull in our extensions. +pub use crate::wchar_ext::{CharPrefixSuffix, WExt}; + +// These are in the Unicode private-use range. We really shouldn't use this +// range but have little choice in the matter given how our lexer/parser works. +// We can't use non-characters for these two ranges because there are only 66 of +// them and we need at least 256 + 64. +// +// If sizeof(wchar_t)==4 we could avoid using private-use chars; however, that +// would result in fish having different behavior on machines with 16 versus 32 +// bit wchar_t. It's better that fish behave the same on both types of systems. +// +// Note: We don't use the highest 8 bit range (0xF800 - 0xF8FF) because we know +// of at least one use of a codepoint in that range: the Apple symbol (0xF8FF) +// on Mac OS X. See http://www.unicode.org/faq/private_use.html. +const ENCODE_DIRECT_BASE: u32 = 0xF600; +const ENCODE_DIRECT_END: u32 = ENCODE_DIRECT_BASE + 256; + +/// Encode a literal byte in a UTF-32 character. This is required for e.g. the echo builtin, whose +/// escape sequences can be used to construct raw byte sequences which are then interpreted as e.g. +/// UTF-8 by the terminal. If we were to interpret each of those bytes as a codepoint and encode it +/// as a UTF-32 character, printing them would result in several characters instead of one UTF-8 +/// character. +/// +/// See https://github.com/fish-shell/fish-shell/issues/1894. +pub fn wchar_literal_byte(byte: u8) -> char { + char::from_u32(ENCODE_DIRECT_BASE + u32::from(byte)) + .expect("private-use codepoint should be valid char") +} diff --git a/fish-rust/src/wchar_ext.rs b/fish-rust/src/wchar_ext.rs new file mode 100644 index 000000000..707a3da81 --- /dev/null +++ b/fish-rust/src/wchar_ext.rs @@ -0,0 +1,137 @@ +use crate::wchar::{wstr, WString}; +use widestring::utfstr::CharsUtf32; + +/// A thing that a wide string can start with or end with. +/// It must have a chars() method which returns a double-ended char iterator. +pub trait CharPrefixSuffix { + type Iter: DoubleEndedIterator; + fn chars(self) -> Self::Iter; +} + +impl CharPrefixSuffix for char { + type Iter = std::iter::Once; + fn chars(self) -> Self::Iter { + std::iter::once(self) + } +} + +impl<'a> CharPrefixSuffix for &'a str { + type Iter = std::str::Chars<'a>; + fn chars(self) -> Self::Iter { + str::chars(self) + } +} + +impl<'a> CharPrefixSuffix for &'a wstr { + type Iter = CharsUtf32<'a>; + fn chars(self) -> Self::Iter { + wstr::chars(self) + } +} + +impl<'a> CharPrefixSuffix for &'a WString { + type Iter = CharsUtf32<'a>; + fn chars(self) -> Self::Iter { + wstr::chars(self) + } +} + +/// \return true if \p prefix is a prefix of \p contents. +fn iter_prefixes_iter(prefix: Prefix, mut contents: Contents) -> bool +where + Prefix: Iterator, + Contents: Iterator, + Prefix::Item: PartialEq, +{ + for c1 in prefix { + match contents.next() { + Some(c2) if c1 == c2 => {} + _ => return false, + } + } + true +} + +/// Convenience functions for WString. +pub trait WExt { + /// Access the chars of a WString or wstr. + fn as_char_slice(&self) -> &[char]; + + /// \return the char at an index. + /// If the index is equal to the length, return '\0'. + /// If the index exceeds the length, then panic. + fn char_at(&self, index: usize) -> char { + let chars = self.as_char_slice(); + if index == chars.len() { + '\0' + } else { + chars[index] + } + } + + /// \return the index of the first occurrence of the given char, or None. + fn find_char(&self, c: char) -> Option { + self.as_char_slice().iter().position(|&x| x == c) + } + + /// \return whether we start with a given Prefix. + /// The Prefix can be a char, a &str, a &wstr, or a &WString. + fn starts_with(&self, prefix: Prefix) -> bool { + iter_prefixes_iter(prefix.chars(), self.as_char_slice().iter().copied()) + } + + /// \return whether we end with a given Suffix. + /// The Suffix can be a char, a &str, a &wstr, or a &WString. + fn ends_with(&self, suffix: Suffix) -> bool { + iter_prefixes_iter( + suffix.chars().rev(), + self.as_char_slice().iter().copied().rev(), + ) + } +} + +impl WExt for WString { + fn as_char_slice(&self) -> &[char] { + self.as_utfstr().as_char_slice() + } +} + +impl WExt for wstr { + fn as_char_slice(&self) -> &[char] { + wstr::as_char_slice(self) + } +} + +#[cfg(test)] +mod tests { + use super::WExt; + use crate::wchar::{WString, L}; + /// Write some tests. + #[cfg(test)] + fn test_find_char() { + assert_eq!(Some(0), L!("abc").find_char('a')); + assert_eq!(Some(1), L!("abc").find_char('b')); + assert_eq!(None, L!("abc").find_char('X')); + assert_eq!(None, L!("").find_char('X')); + } + + #[cfg(test)] + fn test_prefix() { + assert!(L!("").starts_with(L!(""))); + assert!(L!("abc").starts_with(L!(""))); + assert!(L!("abc").starts_with('a')); + assert!(L!("abc").starts_with("ab")); + assert!(L!("abc").starts_with(L!("ab"))); + assert!(L!("abc").starts_with(&WString::from_str("abc"))); + } + + #[cfg(test)] + fn test_suffix() { + assert!(L!("").ends_with(L!(""))); + assert!(L!("abc").ends_with(L!(""))); + assert!(L!("abc").ends_with('c')); + assert!(L!("abc").ends_with("bc")); + assert!(L!("abc").ends_with(L!("bc"))); + assert!(L!("abc").ends_with(&WString::from_str("abc"))); + } +} diff --git a/fish-rust/src/wchar_ffi.rs b/fish-rust/src/wchar_ffi.rs new file mode 100644 index 000000000..cc00c1ea7 --- /dev/null +++ b/fish-rust/src/wchar_ffi.rs @@ -0,0 +1,145 @@ +//! Interfaces for various FFI string types. +//! +//! We have the following string types for FFI purposes: +//! - CxxWString: the Rust view of a C++ wstring. +//! - W0String: an owning string with a nul terminator. +//! - wcharz_t: a "newtyped" pointer to a nul-terminated string, implemented in C++. +//! This is useful for FFI boundaries, to work around autocxx limitations on pointers. + +use crate::ffi; +pub use cxx::CxxWString; +pub use ffi::{wchar_t, wcharz_t}; +use once_cell::sync::Lazy; +pub use widestring::U32CString as W0String; +pub use widestring::{u32cstr, utf32str}; +pub use widestring::{Utf32Str as wstr, Utf32String as WString}; + +/// \return the length of a nul-terminated raw string. +pub fn wcslen(str: *const wchar_t) -> usize { + assert!(!str.is_null(), "Null pointer"); + let mut len = 0; + unsafe { + while *str.offset(len) != 0 { + len += 1; + } + } + len as usize +} + +impl wcharz_t { + /// \return the chars of a wcharz_t. + pub fn chars(&self) -> &[char] { + assert!(!self.str_.is_null(), "Null wcharz"); + let data = self.str_ as *const char; + let len = self.size(); + unsafe { std::slice::from_raw_parts(data, len) } + } +} + +/// Convert wcharz_t to an WString. +impl From<&wcharz_t> for WString { + fn from(wcharz: &wcharz_t) -> Self { + WString::from_chars(wcharz.chars()) + } +} + +/// Convert a wstr or WString to a W0String, which contains a nul-terminator. +/// This is useful for passing across FFI boundaries. +/// In general you don't need to use this directly - use the c_str macro below. +pub fn wstr_to_u32string>(str: Str) -> W0String { + W0String::from_ustr(str.as_ref()).expect("String contained intermediate NUL character") +} + +/// Convert a wstr to a nul-terminated pointer. +/// This needs to be a macro so we can create a temporary with the proper lifetime. +macro_rules! c_str { + ($string:expr) => { + crate::wchar_ffi::wstr_to_u32string($string) + .as_ucstr() + .as_ptr() + .cast::() + }; +} + +/// Convert a wstr to a wcharz_t. +macro_rules! wcharz { + ($string:expr) => { + crate::wchar::wcharz_t { + str_: crate::wchar_ffi::c_str!($string), + } + }; +} + +pub(crate) use c_str; +pub(crate) use wcharz; + +static EMPTY_WSTRING: Lazy> = + Lazy::new(|| cxx::CxxWString::create(&[])); + +/// \return a reference to a shared empty wstring. +pub fn empty_wstring() -> &'static cxx::CxxWString { + &EMPTY_WSTRING +} + +/// Implement Debug for wcharz_t. +impl std::fmt::Debug for wcharz_t { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.str_.is_null() { + write!(f, "((null))") + } else { + self.chars().fmt(f) + } + } +} + +/// Convert self to a CxxWString, in preparation for using over FFI. +/// We can't use "From" as WString is implemented in an external crate. +pub trait WCharToFFI { + fn to_ffi(&self) -> cxx::UniquePtr; +} + +/// WString may be converted to CxxWString. +impl WCharToFFI for WString { + fn to_ffi(&self) -> cxx::UniquePtr { + cxx::CxxWString::create(self.as_char_slice()) + } +} + +/// wstr (wide string slices) may be converted to CxxWString. +impl WCharToFFI for wstr { + fn to_ffi(&self) -> cxx::UniquePtr { + cxx::CxxWString::create(self.as_char_slice()) + } +} + +/// wcharz_t (wide char) may be converted to CxxWString. +impl WCharToFFI for wcharz_t { + fn to_ffi(&self) -> cxx::UniquePtr { + cxx::CxxWString::create(self.chars()) + } +} + +/// Convert from a CxxWString, in preparation for using over FFI. +pub trait WCharFromFFI { + /// Convert from a CxxWString for FFI purposes. + #[allow(clippy::wrong_self_convention)] + fn from_ffi(&self) -> Target; +} + +impl WCharFromFFI for cxx::CxxWString { + fn from_ffi(&self) -> WString { + WString::from_chars(self.as_chars()) + } +} + +impl WCharFromFFI for cxx::UniquePtr { + fn from_ffi(&self) -> WString { + WString::from_chars(self.as_chars()) + } +} + +impl WCharFromFFI for cxx::SharedPtr { + fn from_ffi(&self) -> WString { + WString::from_chars(self.as_chars()) + } +} diff --git a/fish-rust/src/wgetopt.rs b/fish-rust/src/wgetopt.rs new file mode 100644 index 000000000..f2e98405d --- /dev/null +++ b/fish-rust/src/wgetopt.rs @@ -0,0 +1,616 @@ +//! A version of the getopt library for use with wide character strings. +//! +//! Note wgetopter expects an mutable array of const strings. It modifies the order of the +//! strings, but not their contents. +/* Declarations for getopt. + Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. + +This file is part of the GNU C Library. Its master source is NOT part of +the C library, however. The master source lives in /gd/gnu/lib. + +The GNU C Library is free software; you can redistribute it and/or +modify it under the terms of the GNU Library General Public License as +published by the Free Software Foundation; either version 2 of the +License, or (at your option) any later version. + +The GNU C Library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Library General Public License for more details. + +You should have received a copy of the GNU Library General Public +License along with the GNU C Library; see the file COPYING.LIB. If +not, write to the Free Software Foundation, Inc., 675 Mass Ave, +Cambridge, MA 02139, USA. */ + +use crate::wchar::{utf32str, wstr, WExt}; + +/// Describe how to deal with options that follow non-option ARGV-elements. +/// +/// If the caller did not specify anything, the default is PERMUTE. +/// +/// REQUIRE_ORDER means don't recognize them as options; stop option processing when the first +/// non-option is seen. This is what Unix does. This mode of operation is selected by using `+' +/// as the first character of the list of option characters. +/// +/// PERMUTE is the default. We permute the contents of ARGV as we scan, so that eventually all +/// the non-options are at the end. This allows options to be given in any order, even with +/// programs that were not written to expect this. +/// +/// RETURN_IN_ORDER is an option available to programs that were written to expect options and +/// other ARGV-elements in any order and that care about the ordering of the two. We describe +/// each non-option ARGV-element as if it were the argument of an option with character code 1. +/// Using `-` as the first character of the list of option characters selects this mode of +/// operation. +/// +/// The special argument `--` forces an end of option-scanning regardless of the value of +/// `ordering`. In the case of RETURN_IN_ORDER, only `--` can cause `getopt` to return EOF with +/// `woptind` != ARGC. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(clippy::upper_case_acronyms)] +enum Ordering { + REQUIRE_ORDER, + PERMUTE, + RETURN_IN_ORDER, +} + +impl Default for Ordering { + fn default() -> Self { + Ordering::PERMUTE + } +} + +fn empty_wstr() -> &'static wstr { + Default::default() +} + +pub struct wgetopter_t<'opts, 'args, 'argarray> { + /// Argv. + argv: &'argarray mut [&'args wstr], + + /// For communication from `getopt` to the caller. When `getopt` finds an option that takes an + /// argument, the argument value is returned here. Also, when `ordering` is RETURN_IN_ORDER, each + /// non-option ARGV-element is returned here. + pub woptarg: Option<&'args wstr>, + + shortopts: &'opts wstr, + longopts: &'opts [woption<'opts>], + + /// The next char to be scanned in the option-element in which the last option character we + /// returned was found. This allows us to pick up the scan where we left off. + /// + /// If this is empty, it means resume the scan by advancing to the next ARGV-element. + nextchar: &'args wstr, + + /// Index in ARGV of the next element to be scanned. This is used for communication to and from + /// the caller and for communication between successive calls to `getopt`. + /// + /// On entry to `getopt`, zero means this is the first call; initialize. + /// + /// When `getopt` returns EOF, this is the index of the first of the non-option elements that the + /// caller should itself scan. + /// + /// Otherwise, `woptind` communicates from one call to the next how much of ARGV has been scanned + /// so far. + // XXX 1003.2 says this must be 1 before any call. + pub woptind: usize, + + /// Set to an option character which was unrecognized. + woptopt: char, + + /// Describe how to deal with options that follow non-option ARGV-elements. + ordering: Ordering, + + /// Handle permutation of arguments. + /// + /// Describe the part of ARGV that contains non-options that have been skipped. `first_nonopt` + /// is the index in ARGV of the first of them; `last_nonopt` is the index after the last of them. + pub first_nonopt: usize, + pub last_nonopt: usize, + + missing_arg_return_colon: bool, + initialized: bool, +} + +/// Names for the values of the `has_arg` field of `woption`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum woption_argument_t { + no_argument, + required_argument, + optional_argument, +} + +/// Describe the long-named options requested by the application. The LONG_OPTIONS argument to +/// getopt_long or getopt_long_only is a vector of `struct option' terminated by an element +/// containing a name which is zero. +/// +/// The field `has_arg` is: +/// no_argument (or 0) if the option does not take an argument, +/// required_argument (or 1) if the option requires an argument, +/// optional_argument (or 2) if the option takes an optional argument. +/// +/// If the field `flag` is not NULL, it points to a variable that is set to the value given in the +/// field `val` when the option is found, but left unchanged if the option is not found. +/// +/// To have a long-named option do something other than set an `int` to a compiled-in constant, such +/// as set a value from `optarg`, set the option's `flag` field to zero and its `val` field to a +/// nonzero value (the equivalent single-letter option character, if there is one). For long +/// options that have a zero `flag` field, `getopt` returns the contents of the `val` field. +#[derive(Debug, Clone, Copy)] +pub struct woption<'a> { + /// Long name for switch. + pub name: &'a wstr, + + pub has_arg: woption_argument_t, + + /// If \c flag is non-null, this is the value that flag will be set to. Otherwise, this is the + /// return-value of the function call. + pub val: char, +} + +/// Helper function to create a woption. +pub const fn wopt(name: &wstr, has_arg: woption_argument_t, val: char) -> woption<'_> { + woption { name, has_arg, val } +} + +impl<'opts, 'args, 'argarray> wgetopter_t<'opts, 'args, 'argarray> { + pub fn new( + shortopts: &'opts wstr, + longopts: &'opts [woption], + argv: &'argarray mut [&'args wstr], + ) -> Self { + return wgetopter_t { + woptopt: '?', + argv, + shortopts, + longopts, + first_nonopt: 0, + initialized: false, + last_nonopt: 0, + missing_arg_return_colon: false, + nextchar: Default::default(), + ordering: Ordering::PERMUTE, + woptarg: None, + woptind: 0, + }; + } + + pub fn wgetopt_long(&mut self) -> Option { + assert!(self.woptind <= self.argc(), "woptind is out of range"); + let mut ignored = 0; + return self._wgetopt_internal(&mut ignored, false); + } + + pub fn wgetopt_long_idx(&mut self, opt_index: &mut usize) -> Option { + return self._wgetopt_internal(opt_index, false); + } + + /// \return the number of arguments. + fn argc(&self) -> usize { + return self.argv.len(); + } + + /// Exchange two adjacent subsequences of ARGV. One subsequence is elements + /// [first_nonopt,last_nonopt) which contains all the non-options that have been skipped so far. The + /// other is elements [last_nonopt,woptind), which contains all the options processed since those + /// non-options were skipped. + /// + /// `first_nonopt` and `last_nonopt` are relocated so that they describe the new indices of the + /// non-options in ARGV after they are moved. + fn exchange(&mut self) { + let mut bottom = self.first_nonopt; + let middle = self.last_nonopt; + let mut top = self.woptind; + + // Exchange the shorter segment with the far end of the longer segment. That puts the shorter + // segment into the right place. It leaves the longer segment in the right place overall, but it + // consists of two parts that need to be swapped next. + while top > middle && middle > bottom { + if top - middle > middle - bottom { + // Bottom segment is the short one. + let len = middle - bottom; + + // Swap it with the top part of the top segment. + for i in 0..len { + self.argv.swap(bottom + i, top - (middle - bottom) + i); + } + // Exclude the moved bottom segment from further swapping. + top -= len; + } else { + // Top segment is the short one. + let len = top - middle; + + // Swap it with the bottom part of the bottom segment. + for i in 0..len { + self.argv.swap(bottom + i, middle + i); + } + // Exclude the moved top segment from further swapping. + bottom += len; + } + } + + // Update records for the slots the non-options now occupy. + self.first_nonopt += self.woptind - self.last_nonopt; + self.last_nonopt = self.woptind; + } + + /// Initialize the internal data when the first call is made. + fn _wgetopt_initialize(&mut self) { + // Start processing options with ARGV-element 1 (since ARGV-element 0 is the program name); the + // sequence of previously skipped non-option ARGV-elements is empty. + self.first_nonopt = 1; + self.last_nonopt = 1; + self.woptind = 1; + self.nextchar = empty_wstr(); + + let mut optstring = self.shortopts; + + // Determine how to handle the ordering of options and nonoptions. + if optstring.char_at(0) == '-' { + self.ordering = Ordering::RETURN_IN_ORDER; + optstring = &optstring[1..]; + } else if optstring.char_at(0) == '+' { + self.ordering = Ordering::REQUIRE_ORDER; + optstring = &optstring[1..]; + } else { + self.ordering = Ordering::PERMUTE; + } + + if optstring.char_at(0) == ':' { + self.missing_arg_return_colon = true; + optstring = &optstring[1..]; + } + + self.shortopts = optstring; + self.initialized = true; + } + + /// Advance to the next ARGV-element. + /// \return Some(\0) on success, or None or another value if we should stop. + fn _advance_to_next_argv(&mut self) -> Option { + let argc = self.argc(); + if self.ordering == Ordering::PERMUTE { + // If we have just processed some options following some non-options, exchange them so + // that the options come first. + if self.first_nonopt != self.last_nonopt && self.last_nonopt != self.woptind { + self.exchange(); + } else if self.last_nonopt != self.woptind { + self.first_nonopt = self.woptind; + } + + // Skip any additional non-options and extend the range of non-options previously + // skipped. + while self.woptind < argc + && (self.argv[self.woptind].char_at(0) != '-' || self.argv[self.woptind].len() == 1) + { + self.woptind += 1; + } + self.last_nonopt = self.woptind; + } + + // The special ARGV-element `--' means premature end of options. Skip it like a null option, + // then exchange with previous non-options as if it were an option, then skip everything + // else like a non-option. + if self.woptind != argc && self.argv[self.woptind] == "--" { + self.woptind += 1; + + if self.first_nonopt != self.last_nonopt && self.last_nonopt != self.woptind { + self.exchange(); + } else if self.first_nonopt == self.last_nonopt { + self.first_nonopt = self.woptind; + } + self.last_nonopt = argc; + self.woptind = argc; + } + + // If we have done all the ARGV-elements, stop the scan and back over any non-options that + // we skipped and permuted. + + if self.woptind == argc { + // Set the next-arg-index to point at the non-options that we previously skipped, so the + // caller will digest them. + if self.first_nonopt != self.last_nonopt { + self.woptind = self.first_nonopt; + } + return None; + } + + // If we have come to a non-option and did not permute it, either stop the scan or describe + // it to the caller and pass it by. + if self.argv[self.woptind].char_at(0) != '-' || self.argv[self.woptind].len() == 1 { + if self.ordering == Ordering::REQUIRE_ORDER { + return None; + } + self.woptarg = Some(self.argv[self.woptind]); + self.woptind += 1; + return Some(char::from(1)); + } + + // We have found another option-ARGV-element. Skip the initial punctuation. + let skip = if !self.longopts.is_empty() && self.argv[self.woptind].char_at(1) == '-' { + 2 + } else { + 1 + }; + self.nextchar = self.argv[self.woptind][skip..].into(); + return Some(char::from(0)); + } + + /// Check for a matching short opt. + fn _handle_short_opt(&mut self) -> char { + // Look at and handle the next short option-character. + let mut c = self.nextchar.char_at(0); + self.nextchar = &self.nextchar[1..]; + + let temp = match self.shortopts.chars().position(|sc| sc == c) { + Some(pos) => &self.shortopts[pos..], + None => utf32str!(""), + }; + + // Increment `woptind' when we start to process its last character. + if self.nextchar.is_empty() { + self.woptind += 1; + } + + if temp.is_empty() || c == ':' { + self.woptopt = c; + + if !self.nextchar.is_empty() { + self.woptind += 1; + } + return '?'; + } + + if temp.char_at(1) != ':' { + return c; + } + + if temp.char_at(2) == ':' { + // This is an option that accepts an argument optionally. + if !self.nextchar.is_empty() { + self.woptarg = Some(self.nextchar); + self.woptind += 1; + } else { + self.woptarg = None; + } + self.nextchar = empty_wstr(); + } else { + // This is an option that requires an argument. + if !self.nextchar.is_empty() { + self.woptarg = Some(self.nextchar); + // If we end this ARGV-element by taking the rest as an arg, we must advance to + // the next element now. + self.woptind += 1; + } else if self.woptind == self.argc() { + self.woptopt = c; + c = if self.missing_arg_return_colon { + ':' + } else { + '?' + }; + } else { + // We already incremented `woptind' once; increment it again when taking next + // ARGV-elt as argument. + self.woptarg = Some(self.argv[self.woptind]); + self.woptind += 1; + } + self.nextchar = empty_wstr(); + } + + return c; + } + + fn _update_long_opt( + &mut self, + pfound: &woption, + nameend: usize, + longind: &mut usize, + option_index: usize, + retval: &mut char, + ) { + self.woptind += 1; + assert!(self.nextchar.char_at(nameend) == '\0' || self.nextchar.char_at(nameend) == '='); + if self.nextchar.char_at(nameend) == '=' { + if pfound.has_arg != woption_argument_t::no_argument { + self.woptarg = Some(self.nextchar[(nameend + 1)..].into()); + } else { + self.nextchar = empty_wstr(); + *retval = '?'; + return; + } + } else if pfound.has_arg == woption_argument_t::required_argument { + if self.woptind < self.argc() { + self.woptarg = Some(self.argv[self.woptind]); + self.woptind += 1; + } else { + self.nextchar = empty_wstr(); + *retval = if self.missing_arg_return_colon { + ':' + } else { + '?' + }; + return; + } + } + + self.nextchar = empty_wstr(); + *longind = option_index; + *retval = pfound.val; + } + + /// Find a matching long opt. + fn _find_matching_long_opt( + &self, + nameend: usize, + exact: &mut bool, + ambig: &mut bool, + indfound: &mut usize, + ) -> Option> { + let mut pfound: Option = None; + + // Test all long options for either exact match or abbreviated matches. + for (option_index, p) in self.longopts.iter().enumerate() { + // Check if current option is prefix of long opt + if p.name.starts_with(&self.nextchar[..nameend]) { + if nameend == p.name.len() { + // The current option is exact match of this long option + pfound = Some(*p); + *indfound = option_index; + *exact = true; + break; + } else if pfound.is_none() { + // current option is first prefix match but not exact match + pfound = Some(*p); + *indfound = option_index; + } else { + // current option is second or later prefix match but not exact match + *ambig = true; + } + } + } + return pfound; + } + + /// Check for a matching long opt. + fn _handle_long_opt( + &mut self, + longind: &mut usize, + long_only: bool, + retval: &mut char, + ) -> bool { + let mut exact = false; + let mut ambig = false; + let mut indfound: usize = 0; + + let mut nameend = 0; + while self.nextchar.char_at(nameend) != '\0' && self.nextchar.char_at(nameend) != '=' { + nameend += 1; + } + + let pfound = self._find_matching_long_opt(nameend, &mut exact, &mut ambig, &mut indfound); + + if ambig && !exact { + self.nextchar = empty_wstr(); + self.woptind += 1; + *retval = '?'; + return true; + } + + if let Some(pfound) = pfound { + self._update_long_opt(&pfound, nameend, longind, indfound, retval); + return true; + } + + // Can't find it as a long option. If this is not getopt_long_only, or the option starts + // with '--' or is not a valid short option, then it's an error. Otherwise interpret it as a + // short option. + if !long_only + || self.argv[self.woptind].char_at(1) == '-' + || !self + .shortopts + .as_char_slice() + .contains(&self.nextchar.char_at(0)) + { + self.nextchar = empty_wstr(); + self.woptind += 1; + *retval = '?'; + return true; + } + + return false; + } + + /// Scan elements of ARGV (whose length is ARGC) for option characters given in OPTSTRING. + /// + /// If an element of ARGV starts with '-', and is not exactly "-" or "--", then it is an option + /// element. The characters of this element (aside from the initial '-') are option characters. If + /// `getopt` is called repeatedly, it returns successively each of the option characters from each of + /// the option elements. + /// + /// If `getopt` finds another option character, it returns that character, updating `woptind` and + /// `nextchar` so that the next call to `getopt` can resume the scan with the following option + /// character or ARGV-element. + /// + /// If there are no more option characters, `getopt` returns `EOF`. Then `woptind` is the index in + /// ARGV of the first ARGV-element that is not an option. (The ARGV-elements have been permuted so + /// that those that are not options now come last.) + /// + /// OPTSTRING is a string containing the legitimate option characters. If an option character is seen + /// that is not listed in OPTSTRING, return '?'. + /// + /// If a char in OPTSTRING is followed by a colon, that means it wants an arg, so the following text + /// in the same ARGV-element, or the text of the following ARGV-element, is returned in `optarg`. + /// Two colons mean an option that wants an optional arg; if there is text in the current + /// ARGV-element, it is returned in `w.woptarg`, otherwise `w.woptarg` is set to zero. + /// + /// If OPTSTRING starts with `-` or `+', it requests different methods of handling the non-option + /// ARGV-elements. See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + /// + /// Long-named options begin with `--` instead of `-`. Their names may be abbreviated as long as the + /// abbreviation is unique or is an exact match for some defined option. If they have an argument, + /// it follows the option name in the same ARGV-element, separated from the option name by a `=', or + /// else the in next ARGV-element. When `getopt` finds a long-named option, it returns 0 if that + /// option's `flag` field is nonzero, the value of the option's `val` field if the `flag` field is + /// zero. + /// + /// LONGOPTS is a vector of `struct option' terminated by an element containing a name which is zero. + /// + /// LONGIND returns the index in LONGOPT of the long-named option found. It is only valid when a + /// long-named option has been found by the most recent call. + /// + /// If LONG_ONLY is nonzero, '-' as well as '--' can introduce long-named options. + fn _wgetopt_internal(&mut self, longind: &mut usize, long_only: bool) -> Option { + if !self.initialized { + self._wgetopt_initialize(); + } + self.woptarg = None; + + if self.nextchar.is_empty() { + let narg = self._advance_to_next_argv(); + if narg != Some(char::from(0)) { + return narg; + } + } + + // Decode the current option-ARGV-element. + + // Check whether the ARGV-element is a long option. + // + // If long_only and the ARGV-element has the form "-f", where f is a valid short option, don't + // consider it an abbreviated form of a long option that starts with f. Otherwise there would + // be no way to give the -f short option. + // + // On the other hand, if there's a long option "fubar" and the ARGV-element is "-fu", do + // consider that an abbreviation of the long option, just like "--fu", and not "-f" with arg + // "u". + // + // This distinction seems to be the most useful approach. + if !self.longopts.is_empty() && self.woptind < self.argc() { + let arg = self.argv[self.woptind]; + + #[allow(clippy::if_same_then_else)] + #[allow(clippy::needless_bool)] + let try_long = if arg.char_at(0) == '-' && arg.char_at(1) == '-' { + // Like --foo + true + } else if long_only && arg.len() >= 3 { + // Like -fu + true + } else if !self.shortopts.as_char_slice().contains(&arg.char_at(1)) { + // Like -f, but f is not a short arg. + true + } else { + false + }; + + if try_long { + let mut retval = '\0'; + if self._handle_long_opt(longind, long_only, &mut retval) { + return Some(retval); + } + } + } + + return Some(self._handle_short_opt()); + } +} diff --git a/fish-rust/src/wutil/format/format.rs b/fish-rust/src/wutil/format/format.rs new file mode 100644 index 000000000..bab7bcb92 --- /dev/null +++ b/fish-rust/src/wutil/format/format.rs @@ -0,0 +1,516 @@ +// Adapted from https://github.com/tjol/sprintf-rs +// License follows: +// +// Copyright (c) 2021 Thomas Jollans +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +// OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use std::convert::{TryFrom, TryInto}; + +use super::parser::{ConversionSpecifier, ConversionType, NumericParam}; +use super::printf::{PrintfError, Result}; +use crate::wchar::{wstr, WExt, WString, L}; + +/// Trait for types that can be formatted using printf strings +/// +/// Implemented for the basic types and shouldn't need implementing for +/// anything else. +pub trait Printf { + /// Format `self` based on the conversion configured in `spec`. + fn format(&self, spec: &ConversionSpecifier) -> Result; + /// Get `self` as an integer for use as a field width, if possible. + /// Defaults to None. + fn as_int(&self) -> Option { + None + } +} + +impl Printf for u64 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + let mut base = 10; + let mut digits: Vec = "0123456789".chars().collect(); + let mut alt_prefix = L!(""); + match spec.conversion_type { + ConversionType::DecInt => {} + ConversionType::HexIntLower => { + base = 16; + digits = "0123456789abcdef".chars().collect(); + alt_prefix = L!("0x"); + } + ConversionType::HexIntUpper => { + base = 16; + digits = "0123456789ABCDEF".chars().collect(); + alt_prefix = L!("0X"); + } + ConversionType::OctInt => { + base = 8; + digits = "01234567".chars().collect(); + alt_prefix = L!("0"); + } + _ => { + return Err(PrintfError::WrongType); + } + } + let prefix = if spec.alt_form { + alt_prefix.to_owned() + } else { + WString::new() + }; + + // Build the actual number (in reverse) + let mut rev_num = WString::new(); + let mut n = *self; + while n > 0 { + let digit = n % base; + n /= base; + rev_num.push(digits[digit as usize]); + } + if rev_num.is_empty() { + rev_num.push('0'); + } + + // Take care of padding + let width: usize = match spec.width { + NumericParam::Literal(w) => w, + _ => { + return Err(PrintfError::Unknown); // should not happen at this point!! + } + } + .try_into() + .unwrap_or_default(); + let formatted = if spec.left_adj { + let mut num_str = prefix; + num_str.extend(rev_num.chars().rev()); + while num_str.len() < width { + num_str.push(' '); + } + num_str + } else if spec.zero_pad { + while prefix.len() + rev_num.len() < width { + rev_num.push('0'); + } + let mut num_str = prefix; + num_str.extend(rev_num.chars().rev()); + num_str + } else { + let mut num_str = prefix; + num_str.extend(rev_num.chars().rev()); + while num_str.len() < width { + num_str.insert(0, ' '); + } + num_str + }; + + Ok(formatted) + } + fn as_int(&self) -> Option { + i32::try_from(*self).ok() + } +} + +impl Printf for i64 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + match spec.conversion_type { + // signed integer format + ConversionType::DecInt => { + // do I need a sign prefix? + let negative = *self < 0; + let abs_val = self.abs(); + let sign_prefix: &wstr = if negative { + L!("-") + } else if spec.force_sign { + L!("+") + } else if spec.space_sign { + L!(" ") + } else { + L!("") + }; + let mut mod_spec = *spec; + mod_spec.width = match spec.width { + NumericParam::Literal(w) => NumericParam::Literal(w - sign_prefix.len() as i32), + _ => { + return Err(PrintfError::Unknown); + } + }; + + let formatted = (abs_val as u64).format(&mod_spec)?; + // put the sign a after any leading spaces + let mut actual_number = &formatted[0..]; + let mut leading_spaces = &formatted[0..0]; + if let Some(first_non_space) = formatted.chars().position(|c| c != ' ') { + actual_number = &formatted[first_non_space..]; + leading_spaces = &formatted[0..first_non_space]; + } + Ok(leading_spaces.to_owned() + sign_prefix + actual_number) + } + // unsigned-only formats + ConversionType::HexIntLower | ConversionType::HexIntUpper | ConversionType::OctInt => { + (*self as u64).format(spec) + } + _ => Err(PrintfError::WrongType), + } + } + fn as_int(&self) -> Option { + i32::try_from(*self).ok() + } +} + +impl Printf for i32 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + match spec.conversion_type { + // signed integer format + ConversionType::DecInt => (*self as i64).format(spec), + // unsigned-only formats + ConversionType::HexIntLower | ConversionType::HexIntUpper | ConversionType::OctInt => { + (*self as u32).format(spec) + } + _ => Err(PrintfError::WrongType), + } + } + fn as_int(&self) -> Option { + Some(*self) + } +} + +impl Printf for u32 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as u64).format(spec) + } + fn as_int(&self) -> Option { + i32::try_from(*self).ok() + } +} + +impl Printf for i16 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + match spec.conversion_type { + // signed integer format + ConversionType::DecInt => (*self as i64).format(spec), + // unsigned-only formats + ConversionType::HexIntLower | ConversionType::HexIntUpper | ConversionType::OctInt => { + (*self as u16).format(spec) + } + _ => Err(PrintfError::WrongType), + } + } + fn as_int(&self) -> Option { + Some(*self as i32) + } +} + +impl Printf for u16 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as u64).format(spec) + } + fn as_int(&self) -> Option { + Some(*self as i32) + } +} + +impl Printf for i8 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + match spec.conversion_type { + // signed integer format + ConversionType::DecInt => (*self as i64).format(spec), + // unsigned-only formats + ConversionType::HexIntLower | ConversionType::HexIntUpper | ConversionType::OctInt => { + (*self as u8).format(spec) + } + _ => Err(PrintfError::WrongType), + } + } + fn as_int(&self) -> Option { + Some(*self as i32) + } +} + +impl Printf for u8 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as u64).format(spec) + } + fn as_int(&self) -> Option { + Some(*self as i32) + } +} + +impl Printf for usize { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as u64).format(spec) + } + fn as_int(&self) -> Option { + i32::try_from(*self).ok() + } +} + +impl Printf for isize { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as u64).format(spec) + } + fn as_int(&self) -> Option { + i32::try_from(*self).ok() + } +} + +impl Printf for f64 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + let mut prefix = WString::new(); + let mut number = WString::new(); + + // set up the sign + if self.is_sign_negative() { + prefix.push('-'); + } else if spec.space_sign { + prefix.push(' '); + } else if spec.force_sign { + prefix.push('+'); + } + + if self.is_finite() { + let mut use_scientific = false; + let mut exp_symb = 'e'; + let mut strip_trailing_0s = false; + let mut abs = self.abs(); + let mut exponent = abs.log10().floor() as i32; + let mut precision = match spec.precision { + NumericParam::Literal(p) => p, + _ => { + return Err(PrintfError::Unknown); + } + }; + if precision <= 0 { + precision = 0; + } + match spec.conversion_type { + ConversionType::DecFloatLower | ConversionType::DecFloatUpper => { + // default + } + ConversionType::SciFloatLower => { + use_scientific = true; + } + ConversionType::SciFloatUpper => { + use_scientific = true; + exp_symb = 'E'; + } + ConversionType::CompactFloatLower | ConversionType::CompactFloatUpper => { + if spec.conversion_type == ConversionType::CompactFloatUpper { + exp_symb = 'E' + } + strip_trailing_0s = true; + if precision == 0 { + precision = 1; + } + // exponent signifies significant digits - we must round now + // to (re)calculate the exponent + let rounding_factor = 10.0_f64.powf((precision - 1 - exponent) as f64); + let rounded_fixed = (abs * rounding_factor).round(); + abs = rounded_fixed / rounding_factor; + exponent = abs.log10().floor() as i32; + if exponent < -4 || exponent >= precision { + use_scientific = true; + precision -= 1; + } else { + // precision specifies the number of significant digits + precision -= 1 + exponent; + } + } + _ => { + return Err(PrintfError::WrongType); + } + } + + if use_scientific { + let mut normal = abs / 10.0_f64.powf(exponent as f64); + + if precision > 0 { + let mut int_part = normal.trunc(); + let mut exp_factor = 10.0_f64.powf(precision as f64); + let mut tail = ((normal - int_part) * exp_factor).round() as u64; + while tail >= exp_factor as u64 { + // Overflow, must round + int_part += 1.0; + tail -= exp_factor as u64; + if int_part >= 10.0 { + // keep same precision - which means changing exponent + exponent += 1; + exp_factor /= 10.0; + normal /= 10.0; + int_part = normal.trunc(); + tail = ((normal - int_part) * exp_factor).round() as u64; + } + } + + let mut rev_tail_str = WString::new(); + for _ in 0..precision { + rev_tail_str.push((b'0' + (tail % 10) as u8) as char); + tail /= 10; + } + number.push_str(&int_part.to_string()); + number.push('.'); + number.extend(rev_tail_str.chars().rev()); + if strip_trailing_0s { + while number.ends_with('0') { + number.pop(); + } + } + } else { + number.push_str(&format!("{}", normal.round())); + } + number.push(exp_symb); + number.push_str(&format!("{exponent:+03}")); + } else if precision > 0 { + let mut int_part = abs.trunc(); + let exp_factor = 10.0_f64.powf(precision as f64); + let mut tail = ((abs - int_part) * exp_factor).round() as u64; + let mut rev_tail_str = WString::new(); + if tail >= exp_factor as u64 { + // overflow - we must round up + int_part += 1.0; + tail -= exp_factor as u64; + // no need to change the exponent as we don't have one + // (not scientific notation) + } + for _ in 0..precision { + rev_tail_str.push((b'0' + (tail % 10) as u8) as char); + tail /= 10; + } + number.push_str(&int_part.to_string()); + number.push('.'); + number.extend(rev_tail_str.chars().rev()); + if strip_trailing_0s { + while number.ends_with('0') { + number.pop(); + } + } + } else { + number.push_str(&format!("{}", abs.round())); + } + } else { + // not finite + match spec.conversion_type { + ConversionType::DecFloatLower + | ConversionType::SciFloatLower + | ConversionType::CompactFloatLower => { + if self.is_infinite() { + number.push_str("inf") + } else { + number.push_str("nan") + } + } + ConversionType::DecFloatUpper + | ConversionType::SciFloatUpper + | ConversionType::CompactFloatUpper => { + if self.is_infinite() { + number.push_str("INF") + } else { + number.push_str("NAN") + } + } + _ => { + return Err(PrintfError::WrongType); + } + } + } + // Take care of padding + let width: usize = match spec.width { + NumericParam::Literal(w) => w, + _ => { + return Err(PrintfError::Unknown); // should not happen at this point!! + } + } + .try_into() + .unwrap_or_default(); + let formatted = if spec.left_adj { + let mut full_num = prefix + &*number; + while full_num.len() < width { + full_num.push(' '); + } + full_num + } else if spec.zero_pad && self.is_finite() { + while prefix.len() + number.len() < width { + prefix.push('0'); + } + prefix + &*number + } else { + let mut full_num = prefix + &*number; + while full_num.len() < width { + full_num.insert(0, ' '); + } + full_num + }; + Ok(formatted) + } + fn as_int(&self) -> Option { + None + } +} + +impl Printf for f32 { + fn format(&self, spec: &ConversionSpecifier) -> Result { + (*self as f64).format(spec) + } +} + +impl Printf for &wstr { + fn format(&self, spec: &ConversionSpecifier) -> Result { + if spec.conversion_type == ConversionType::String { + Ok((*self).to_owned()) + } else { + Err(PrintfError::WrongType) + } + } +} + +impl Printf for &str { + fn format(&self, spec: &ConversionSpecifier) -> Result { + if spec.conversion_type == ConversionType::String { + Ok((*self).into()) + } else { + Err(PrintfError::WrongType) + } + } +} + +impl Printf for char { + fn format(&self, spec: &ConversionSpecifier) -> Result { + if spec.conversion_type == ConversionType::Char { + let mut s = WString::new(); + s.push(*self); + Ok(s) + } else { + Err(PrintfError::WrongType) + } + } +} + +impl Printf for String { + fn format(&self, spec: &ConversionSpecifier) -> Result { + self.as_str().format(spec) + } +} + +impl Printf for WString { + fn format(&self, spec: &ConversionSpecifier) -> Result { + self.as_utfstr().format(spec) + } +} + +impl Printf for &WString { + fn format(&self, spec: &ConversionSpecifier) -> Result { + self.as_utfstr().format(spec) + } +} diff --git a/fish-rust/src/wutil/format/mod.rs b/fish-rust/src/wutil/format/mod.rs new file mode 100644 index 000000000..67fbedb38 --- /dev/null +++ b/fish-rust/src/wutil/format/mod.rs @@ -0,0 +1,7 @@ +#[allow(clippy::module_inception)] +mod format; +mod parser; +pub mod printf; + +#[cfg(test)] +mod tests; diff --git a/fish-rust/src/wutil/format/parser.rs b/fish-rust/src/wutil/format/parser.rs new file mode 100644 index 000000000..074e80601 --- /dev/null +++ b/fish-rust/src/wutil/format/parser.rs @@ -0,0 +1,218 @@ +// Adapted from https://github.com/tjol/sprintf-rs +// License follows: +// +// Copyright (c) 2021 Thomas Jollans +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +// OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use super::printf::{PrintfError, Result}; +use crate::wchar::{wstr, WExt, WString}; + +#[derive(Debug, Clone)] +pub enum FormatElement { + Verbatim(WString), + Format(ConversionSpecifier), +} + +/// Parsed printf conversion specifier +#[derive(Debug, Clone, Copy)] +pub struct ConversionSpecifier { + /// flag `#`: use `0x`, etc? + pub alt_form: bool, + /// flag `0`: left-pad with zeros? + pub zero_pad: bool, + /// flag `-`: left-adjust (pad with spaces on the right) + pub left_adj: bool, + /// flag `' '` (space): indicate sign with a space? + pub space_sign: bool, + /// flag `+`: Always show sign? (for signed numbers) + pub force_sign: bool, + /// field width + pub width: NumericParam, + /// floating point field precision + pub precision: NumericParam, + /// data type + pub conversion_type: ConversionType, +} + +/// Width / precision parameter +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum NumericParam { + /// The literal width + Literal(i32), + /// Get the width from the previous argument + /// + /// This should never be passed to [Printf::format()][super::format::Printf::format()]. + FromArgument, +} + +/// Printf data type +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ConversionType { + /// `d`, `i`, or `u` + DecInt, + /// `o` + OctInt, + /// `x` or `p` + HexIntLower, + /// `X` + HexIntUpper, + /// `e` + SciFloatLower, + /// `E` + SciFloatUpper, + /// `f` + DecFloatLower, + /// `F` + DecFloatUpper, + /// `g` + CompactFloatLower, + /// `G` + CompactFloatUpper, + /// `c` + Char, + /// `s` + String, + /// `%` + PercentSign, +} + +pub(crate) fn parse_format_string(fmt: &wstr) -> Result> { + // find the first % + let mut res = Vec::new(); + let parts: Vec<&wstr> = match fmt.find_char('%') { + Some(i) => vec![&fmt[..i], &fmt[(i + 1)..]], + None => vec![fmt], + }; + if !parts[0].is_empty() { + res.push(FormatElement::Verbatim(parts[0].to_owned())); + } + if parts.len() > 1 { + let (spec, rest) = take_conversion_specifier(parts[1])?; + res.push(FormatElement::Format(spec)); + res.append(&mut parse_format_string(rest)?); + } + + Ok(res) +} + +fn take_conversion_specifier(s: &wstr) -> Result<(ConversionSpecifier, &wstr)> { + let mut spec = ConversionSpecifier { + alt_form: false, + zero_pad: false, + left_adj: false, + space_sign: false, + force_sign: false, + width: NumericParam::Literal(0), + precision: NumericParam::Literal(6), + // ignore length modifier + conversion_type: ConversionType::DecInt, + }; + + let mut s = s; + + // parse flags + loop { + match s.chars().next() { + Some('#') => { + spec.alt_form = true; + } + Some('0') => { + spec.zero_pad = true; + } + Some('-') => { + spec.left_adj = true; + } + Some(' ') => { + spec.space_sign = true; + } + Some('+') => { + spec.force_sign = true; + } + _ => { + break; + } + } + s = &s[1..]; + } + // parse width + let (w, mut s) = take_numeric_param(s); + spec.width = w; + // parse precision + if matches!(s.chars().next(), Some('.')) { + s = &s[1..]; + let (p, s2) = take_numeric_param(s); + spec.precision = p; + s = s2; + } + // check length specifier + for len_spec in ["hh", "h", "l", "ll", "q", "L", "j", "z", "Z", "t"] { + if s.starts_with(len_spec) { + s = &s[len_spec.len()..]; + break; // only allow one length specifier + } + } + // parse conversion type + spec.conversion_type = match s.chars().next() { + Some('i') | Some('d') | Some('u') => ConversionType::DecInt, + Some('o') => ConversionType::OctInt, + Some('x') => ConversionType::HexIntLower, + Some('X') => ConversionType::HexIntUpper, + Some('e') => ConversionType::SciFloatLower, + Some('E') => ConversionType::SciFloatUpper, + Some('f') => ConversionType::DecFloatLower, + Some('F') => ConversionType::DecFloatUpper, + Some('g') => ConversionType::CompactFloatLower, + Some('G') => ConversionType::CompactFloatUpper, + Some('c') | Some('C') => ConversionType::Char, + Some('s') | Some('S') => ConversionType::String, + Some('p') => { + spec.alt_form = true; + ConversionType::HexIntLower + } + Some('%') => ConversionType::PercentSign, + _ => { + return Err(PrintfError::ParseError); + } + }; + + Ok((spec, &s[1..])) +} + +fn take_numeric_param(s: &wstr) -> (NumericParam, &wstr) { + match s.chars().next() { + Some('*') => (NumericParam::FromArgument, &s[1..]), + Some(digit) if ('1'..='9').contains(&digit) => { + let mut s = s; + let mut w = 0; + loop { + match s.chars().next() { + Some(digit) if ('0'..='9').contains(&digit) => { + w = 10 * w + (digit as i32 - '0' as i32); + } + _ => { + break; + } + } + s = &s[1..]; + } + (NumericParam::Literal(w), s) + } + _ => (NumericParam::Literal(0), s), + } +} diff --git a/fish-rust/src/wutil/format/printf.rs b/fish-rust/src/wutil/format/printf.rs new file mode 100644 index 000000000..1153a2b40 --- /dev/null +++ b/fish-rust/src/wutil/format/printf.rs @@ -0,0 +1,126 @@ +// Adapted from https://github.com/tjol/sprintf-rs +// License follows: +// +// Copyright (c) 2021 Thomas Jollans +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +// OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +pub use super::format::Printf; +use super::parser::{parse_format_string, ConversionType, FormatElement, NumericParam}; +use crate::wchar::{wstr, WString}; + +/// Error type +#[derive(Debug, Clone, Copy)] +pub enum PrintfError { + /// Error parsing the format string + ParseError, + /// Incorrect type passed as an argument + WrongType, + /// Too many arguments passed + TooManyArgs, + /// Too few arguments passed + NotEnoughArgs, + /// Other error (should never happen) + Unknown, +} + +pub type Result = std::result::Result; + +/// Format a string. (Roughly equivalent to `vsnprintf` or `vasprintf` in C) +/// +/// Takes a printf-style format string `format` and a slice of dynamically +/// typed arguments, `args`. +/// +/// use sprintf::{vsprintf, Printf}; +/// let n = 16; +/// let args: Vec<&dyn Printf> = vec![&n]; +/// let s = vsprintf("%#06x", &args).unwrap(); +/// assert_eq!(s, "0x0010"); +/// +/// See also: [sprintf] +pub fn vsprintf(format: &wstr, args: &[&dyn Printf]) -> Result { + vsprintfp(&parse_format_string(format)?, args) +} + +fn vsprintfp(format: &[FormatElement], args: &[&dyn Printf]) -> Result { + let mut res = WString::new(); + + let mut args = args; + let mut pop_arg = || { + if args.is_empty() { + Err(PrintfError::NotEnoughArgs) + } else { + let a = args[0]; + args = &args[1..]; + Ok(a) + } + }; + + for elem in format { + match elem { + FormatElement::Verbatim(s) => { + res.push_utfstr(s); + } + FormatElement::Format(spec) => { + if spec.conversion_type == ConversionType::PercentSign { + res.push('%'); + } else { + let mut completed_spec = *spec; + if spec.width == NumericParam::FromArgument { + completed_spec.width = NumericParam::Literal( + pop_arg()?.as_int().ok_or(PrintfError::WrongType)?, + ) + } + if spec.precision == NumericParam::FromArgument { + completed_spec.precision = NumericParam::Literal( + pop_arg()?.as_int().ok_or(PrintfError::WrongType)?, + ) + } + res.push_utfstr(&pop_arg()?.format(&completed_spec)?); + } + } + } + } + + if args.is_empty() { + Ok(res) + } else { + Err(PrintfError::TooManyArgs) + } +} + +/// Format a string. (Roughly equivalent to `snprintf` or `asprintf` in C) +/// +/// Takes a printf-style format string `format` and a variable number of +/// additional arguments. +/// +/// use sprintf::sprintf; +/// let s = sprintf!("%s = %*d", "forty-two", 4, 42); +/// assert_eq!(s, "forty-two = 42"); +/// +/// Wrapper around [vsprintf]. +macro_rules! sprintf { + ( + $fmt:expr, // format string + $($arg:expr),* // arguments + $(,)? // optional trailing comma + ) => { + crate::wutil::format::printf::vsprintf($fmt, &[$( &($arg) as &dyn crate::wutil::format::printf::Printf),* ][..]).expect("Invalid format string and/or arguments") + }; +} +pub(crate) use sprintf; diff --git a/fish-rust/src/wutil/format/tests.rs b/fish-rust/src/wutil/format/tests.rs new file mode 100644 index 000000000..309a7e507 --- /dev/null +++ b/fish-rust/src/wutil/format/tests.rs @@ -0,0 +1,117 @@ +// Adapted from https://github.com/tjol/sprintf-rs +// License follows: +// +// Copyright (c) 2021 Thomas Jollans +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS +// OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF +// OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +use super::printf::{sprintf, Printf}; +use crate::wchar::{widestrs, WString, L}; + +fn check_fmt(nfmt: &str, arg: T, expected: &str) { + let fmt: WString = nfmt.into(); + let our_result = sprintf!(&fmt, arg); + assert_eq!(our_result, expected); +} + +#[test] +fn test_int() { + check_fmt("%d", 12, "12"); + check_fmt("~%d~", 148, "~148~"); + check_fmt("00%dxx", -91232, "00-91232xx"); + check_fmt("%x", -9232, "ffffdbf0"); + check_fmt("%X", 432, "1B0"); + check_fmt("%09X", 432, "0000001B0"); + check_fmt("%9X", 432, " 1B0"); + check_fmt("%+9X", 492, " 1EC"); + check_fmt("% #9x", 4589, " 0x11ed"); + check_fmt("%2o", 4, " 4"); + check_fmt("% 12d", -4, " -4"); + check_fmt("% 12d", 48, " 48"); + check_fmt("%ld", -4_i64, "-4"); + check_fmt("%lX", -4_i64, "FFFFFFFFFFFFFFFC"); + check_fmt("%ld", 48_i64, "48"); + check_fmt("%-8hd", -12_i16, "-12 "); +} + +#[test] +fn test_float() { + check_fmt("%f", -46.38, "-46.380000"); + check_fmt("%012.3f", 1.2, "00000001.200"); + check_fmt("%012.3e", 1.7, "0001.700e+00"); + check_fmt("%e", 1e300, "1.000000e+300"); + check_fmt("%012.3g%%!", 2.6, "0000000002.6%!"); + check_fmt("%012.5G", -2.69, "-00000002.69"); + check_fmt("%+7.4f", 42.785, "+42.7850"); + check_fmt("{}% 7.4E", 493.12, "{} 4.9312E+02"); + check_fmt("% 7.4E", -120.3, "-1.2030E+02"); + check_fmt("%-10F", f64::INFINITY, "INF "); + check_fmt("%+010F", f64::INFINITY, " +INF"); + check_fmt("% f", f64::NAN, " nan"); + check_fmt("%+f", f64::NAN, "+nan"); + check_fmt("%.1f", 999.99, "1000.0"); + check_fmt("%.1f", 9.99, "10.0"); + check_fmt("%.1e", 9.99, "1.0e+01"); + check_fmt("%.2f", 9.99, "9.99"); + check_fmt("%.2e", 9.99, "9.99e+00"); + check_fmt("%.3f", 9.99, "9.990"); + check_fmt("%.3e", 9.99, "9.990e+00"); + check_fmt("%.1g", 9.99, "1e+01"); + check_fmt("%.1G", 9.99, "1E+01"); + check_fmt("%.1f", 2.99, "3.0"); + check_fmt("%.1e", 2.99, "3.0e+00"); + check_fmt("%.1g", 2.99, "3"); + check_fmt("%.1f", 2.599, "2.6"); + check_fmt("%.1e", 2.599, "2.6e+00"); + check_fmt("%.1g", 2.599, "3"); +} + +#[test] +fn test_str() { + check_fmt( + "test %% with string: %s yay\n", + "FOO", + "test % with string: FOO yay\n", + ); + check_fmt("test char %c", '~', "test char ~"); +} + +#[test] +#[widestrs] +fn test_str_concat() { + assert_eq!(sprintf!("%s-%ls"L, "abc", "def"L), "abc-def"L); + assert_eq!(sprintf!("%s-%ls"L, "abc", "def"L), "abc-def"L); +} + +#[test] +#[should_panic] +fn test_bad_format() { + sprintf!(L!("%s"), 123); +} + +#[test] +#[should_panic] +fn test_missing_arg() { + sprintf!(L!("%s-%s"), "abc"); +} + +#[test] +#[should_panic] +fn test_too_many_args() { + sprintf!(L!("%d"), 1, 2, 3); +} diff --git a/fish-rust/src/wutil/gettext.rs b/fish-rust/src/wutil/gettext.rs new file mode 100644 index 000000000..1842d7eca --- /dev/null +++ b/fish-rust/src/wutil/gettext.rs @@ -0,0 +1,45 @@ +use crate::ffi; +use crate::wchar::{wchar_t, wstr}; +use crate::wchar_ffi::wcslen; + +/// Support for wgettext. + +/// Implementation detail for wgettext!. +pub fn wgettext_impl_do_not_use_directly(text: &[wchar_t]) -> &'static wstr { + assert_eq!(text.last(), Some(&0), "should be nul-terminated"); + let res: *const wchar_t = ffi::wgettext_ptr(text.as_ptr()); + let slice = unsafe { std::slice::from_raw_parts(res as *const u32, wcslen(res)) }; + wstr::from_slice(slice).expect("Invalid UTF-32") +} + +/// Get a (possibly translated) string from a string literal. +/// This returns a &'static wstr. +macro_rules! wgettext { + ($string:expr) => { + crate::wutil::gettext::wgettext_impl_do_not_use_directly( + crate::wchar_ffi::u32cstr!($string).as_slice_with_nul(), + ) + }; +} +pub(crate) use wgettext; + +/// Like wgettext, but applies a sprintf format string. +/// The result is a WString. +macro_rules! wgettext_fmt { + ( + $string:expr, // format string + $($args:expr),* // list of expressions + $(,)? // optional trailing comma + ) => { + crate::wutil::sprintf!(&crate::wutil::wgettext!($string), $($args),*) + }; +} +pub(crate) use wgettext_fmt; + +use crate::ffi_tests::add_test; +add_test!("test_untranslated", || { + let s: &'static wstr = wgettext!("abc"); + assert_eq!(s, "abc"); + let s2: &'static wstr = wgettext!("static"); + assert_eq!(s2, "static"); +}); diff --git a/fish-rust/src/wutil/mod.rs b/fish-rust/src/wutil/mod.rs new file mode 100644 index 000000000..ae29f5cca --- /dev/null +++ b/fish-rust/src/wutil/mod.rs @@ -0,0 +1,7 @@ +pub mod format; +pub mod gettext; +mod wcstoi; + +pub(crate) use format::printf::sprintf; +pub(crate) use gettext::{wgettext, wgettext_fmt}; +pub use wcstoi::*; diff --git a/fish-rust/src/wutil/wcstoi.rs b/fish-rust/src/wutil/wcstoi.rs new file mode 100644 index 000000000..df8f89ede --- /dev/null +++ b/fish-rust/src/wutil/wcstoi.rs @@ -0,0 +1,216 @@ +use num_traits::{NumCast, PrimInt}; +use std::iter::Peekable; + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Error { + Overflow, + Empty, + InvalidDigit, +} + +struct ParseResult { + result: u64, + negative: bool, +} + +/// Helper to get the current char, or \0. +fn current(chars: &mut Peekable) -> char +where + Chars: Iterator, +{ + match chars.peek() { + Some(c) => *c, + None => '\0', + } +} + +/// Parse the given \p src as an integer. +/// If mradix is not None, it is used as the radix; otherwise the radix is inferred: +/// - Leading 0x or 0X means 16. +/// - Leading 0 means 8. +/// - Otherwise 10. +/// The parse result contains the number as a u64, and whether it was negative. +fn fish_parse_radix(ichars: Chars, mradix: Option) -> Result +where + Chars: Iterator, +{ + if let Some(r) = mradix { + assert!((2..=36).contains(&r), "fish_parse_radix: invalid radix {r}"); + } + let chars = &mut ichars.peekable(); + + // Skip leading whitespace. + while current(chars).is_whitespace() { + chars.next(); + } + + if chars.peek().is_none() { + return Err(Error::Empty); + } + + // Consume leading +/-. + let mut negative; + match current(chars) { + '-' | '+' => { + negative = current(chars) == '-'; + chars.next(); + } + _ => negative = false, + } + + // Determine the radix. + let radix = if let Some(radix) = mradix { + radix + } else if current(chars) == '0' { + chars.next(); + match current(chars) { + 'x' | 'X' => { + chars.next(); + 16 + } + c if ('0'..='9').contains(&c) => 8, + _ => { + // Just a 0. + return Ok(ParseResult { + result: 0, + negative: false, + }); + } + } + } else { + 10 + }; + + // Compute as u64. + let mut consumed1 = false; + let mut result: u64 = 0; + while let Some(digit) = current(chars).to_digit(radix) { + result = result + .checked_mul(radix as u64) + .and_then(|r| r.checked_add(digit as u64)) + .ok_or(Error::Overflow)?; + chars.next(); + consumed1 = true; + } + + // Did we consume at least one char? + if !consumed1 { + return Err(Error::InvalidDigit); + } + + // Do not return -0. + if result == 0 { + negative = false; + } + Ok(ParseResult { result, negative }) +} + +/// Parse some iterator over Chars into some Integer type, optionally with a radix. +fn fish_wcstoi_impl(src: Chars, mradix: Option) -> Result +where + Chars: Iterator, + Int: PrimInt, +{ + let bits = Int::zero().count_zeros(); + assert!(bits <= 64, "fish_wcstoi: Int must be <= 64 bits"); + let signed = Int::min_value() < Int::zero(); + + let ParseResult { + result, negative, .. + } = fish_parse_radix(src, mradix)?; + + if !signed && negative { + Err(Error::InvalidDigit) + } else if !signed || !negative { + match Int::from(result) { + Some(r) => Ok(r), + None => Err(Error::Overflow), + } + } else { + assert!(signed && negative); + // Signed type, so convert to s64. + // Careful of the most negative value. + if bits == 64 && result == 1 << 63 { + return Ok(Int::min_value()); + } + ::from(result) + .and_then(|r| r.checked_neg()) + .and_then(|r| Int::from(r)) + .ok_or(Error::Overflow) + } +} + +/// Convert the given wide string to an integer. +/// The semantics here match wcstol(): +/// - Leading whitespace is skipped. +/// - 0 means octal, 0x means hex +/// - Leading + is supported. +pub fn fish_wcstoi(src: Chars) -> Result +where + Chars: Iterator, + Int: PrimInt, +{ + fish_wcstoi_impl(src, None) +} + +/// Convert the given wide string to an integer using the given radix. +/// Leading whitespace is skipped. +pub fn fish_wcstoi_radix(src: Chars, radix: u32) -> Result +where + Chars: Iterator, + Int: PrimInt, +{ + fish_wcstoi_impl(src, Some(radix)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn test_min_max(min: Int, max: Int) { + assert_eq!(fish_wcstoi(min.to_string().chars()), Ok(min)); + assert_eq!(fish_wcstoi(max.to_string().chars()), Ok(max)); + } + + #[test] + fn tests() { + let run1 = |s: &str| -> Result { fish_wcstoi(s.chars()) }; + let run1_rad = + |s: &str, radix: u32| -> Result { fish_wcstoi_radix(s.chars(), radix) }; + assert_eq!(run1(""), Err(Error::Empty)); + assert_eq!(run1(" \n "), Err(Error::Empty)); + assert_eq!(run1("0"), Ok(0)); + assert_eq!(run1("-0"), Ok(0)); + assert_eq!(run1("+0"), Ok(0)); + assert_eq!(run1("+-0"), Err(Error::InvalidDigit)); + assert_eq!(run1("-+0"), Err(Error::InvalidDigit)); + assert_eq!(run1("123"), Ok(123)); + assert_eq!(run1("+123"), Ok(123)); + assert_eq!(run1("-123"), Ok(-123)); + assert_eq!(run1("123"), Ok(123)); + assert_eq!(run1("+0x123"), Ok(291)); + assert_eq!(run1("-0x123"), Ok(-291)); + assert_eq!(run1("+0X123"), Ok(291)); + assert_eq!(run1("-0X123"), Ok(-291)); + assert_eq!(run1("+0123"), Ok(83)); + assert_eq!(run1("-0123"), Ok(-83)); + assert_eq!(run1(" 345 "), Ok(345)); + assert_eq!(run1(" -345 "), Ok(-345)); + assert_eq!(run1(" x345"), Err(Error::InvalidDigit)); + assert_eq!(run1("456x"), Ok(456)); + assert_eq!(run1("456 x"), Ok(456)); + assert_eq!(run1("99999999999999999999999"), Err(Error::Overflow)); + assert_eq!(run1("-99999999999999999999999"), Err(Error::Overflow)); + // This is subtle. "567" in base 8 is "375" in base 10. The final "8" is not converted. + assert_eq!(run1_rad("5678", 8), Ok(375)); + + test_min_max(std::i8::MIN, std::i8::MAX); + test_min_max(std::i16::MIN, std::i16::MAX); + test_min_max(std::i32::MIN, std::i32::MAX); + test_min_max(std::i64::MIN, std::i64::MAX); + test_min_max(std::u8::MIN, std::u8::MAX); + test_min_max(std::u16::MIN, std::u16::MAX); + test_min_max(std::u32::MIN, std::u32::MAX); + test_min_max(std::u64::MIN, std::u64::MAX); + } +} diff --git a/fish-rust/widestring-suffix/Cargo.lock b/fish-rust/widestring-suffix/Cargo.lock new file mode 100644 index 000000000..f5e974052 --- /dev/null +++ b/fish-rust/widestring-suffix/Cargo.lock @@ -0,0 +1,47 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "proc-macro2" +version = "1.0.49" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "1.0.107" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" + +[[package]] +name = "widestring-suffix" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/fish-rust/widestring-suffix/Cargo.toml b/fish-rust/widestring-suffix/Cargo.toml new file mode 100644 index 000000000..d756a5b17 --- /dev/null +++ b/fish-rust/widestring-suffix/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "widestring-suffix" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = { version = "1.0", features = ["full", "visit-mut"] } +proc-macro2 = "1.0" +quote = "1.0" diff --git a/fish-rust/widestring-suffix/src/lib.rs b/fish-rust/widestring-suffix/src/lib.rs new file mode 100644 index 000000000..4162e7274 --- /dev/null +++ b/fish-rust/widestring-suffix/src/lib.rs @@ -0,0 +1,51 @@ +extern crate proc_macro as pm; + +use proc_macro2::{Group, Literal, TokenStream, TokenTree}; +use quote::quote_spanned; +use syn::{Lit, LitStr}; + +/// A proc macro which allows easy creation of nul-terminated wide strings. +/// It replaces strings with an L suffix like so: +/// "foo"L +/// with a call like so: +/// crate::wchar::L!("foo") +#[proc_macro_attribute] +pub fn widestrs(_attr: pm::TokenStream, input: pm::TokenStream) -> pm::TokenStream { + let s = widen_stream(input.into()); + s.into() +} + +fn widen_token_tree(tt: TokenTree) -> TokenStream { + match tt { + TokenTree::Group(group) => { + let wide_stream = widen_stream(group.stream()); + TokenTree::Group(Group::new(group.delimiter(), wide_stream)).into() + } + TokenTree::Literal(lit) => widen_literal(lit), + tt => tt.into(), + } +} + +fn widen_stream(input: TokenStream) -> TokenStream { + input.into_iter().map(widen_token_tree).collect() +} + +fn try_parse_literal(tt: TokenTree) -> Option { + let ts: TokenStream = tt.into(); + match syn::parse2::(ts) { + Ok(Lit::Str(lit)) => Some(lit), + _ => None, + } +} + +fn widen_literal(lit: Literal) -> TokenStream { + let tt = TokenTree::Literal(lit); + match try_parse_literal(tt.clone()) { + Some(lit) if lit.suffix() == "L" => { + let value = lit.value(); + let span = lit.span(); + quote_spanned!(span=> crate::wchar::L!(#value)).into() + } + _ => tt.into(), + } +} diff --git a/fish-rust/widestring-suffix/tests/test.rs b/fish-rust/widestring-suffix/tests/test.rs new file mode 100644 index 000000000..eb11e1b72 --- /dev/null +++ b/fish-rust/widestring-suffix/tests/test.rs @@ -0,0 +1,24 @@ +use widestring_suffix::widestrs; + +mod wchar { + macro_rules! L { + ($string:expr) => { + 42 + }; + } + + pub(crate) use L; +} + +#[widestrs] +mod stuff { + pub fn test1() { + let s = "abc"L; + assert_eq!(s, 42); + } +} + +#[test] +fn test_widestring() { + stuff::test1(); +} diff --git a/fish.spec.in b/fish.spec.in index 5ee8ae86e..bf2d73776 100644 --- a/fish.spec.in +++ b/fish.spec.in @@ -10,6 +10,7 @@ URL: https://fishshell.com/ Source0: %{name}_@VERSION@.orig.tar.xz BuildRequires: ncurses-devel gettext gcc-c++ xz pcre2-devel +BuildRequires: rust >= 1.67 %if 0%{?rhel} && 0%{?rhel} < 8 BuildRequires: cmake3 diff --git a/src/ast.cpp b/src/ast.cpp index 554ee48d6..bd5d0b23b 100644 --- a/src/ast.cpp +++ b/src/ast.cpp @@ -31,7 +31,7 @@ static tok_flags_t tokenizer_flags_from_parse_flags(parse_tree_flags_t flags) { // Given an expanded string, returns any keyword it matches. static parse_keyword_t keyword_with_name(const wcstring &name) { - return str_to_enum(name.c_str(), keyword_enum_map, keyword_enum_map_len); + return keyword_from_string(name.c_str()); } static bool is_keyword_char(wchar_t c) { @@ -77,8 +77,7 @@ static parse_keyword_t keyword_for_token(token_type_t tok, const wcstring &token } /// Convert from tokenizer_t's token type to a parse_token_t type. -static parse_token_type_t parse_token_type_from_tokenizer_token( - enum token_type_t tokenizer_token_type) { +static parse_token_type_t parse_token_type_from_tokenizer_token(token_type_t tokenizer_token_type) { switch (tokenizer_token_type) { case token_type_t::string: return parse_token_type_t::string; @@ -111,7 +110,7 @@ class token_stream_t { explicit token_stream_t(const wcstring &src, parse_tree_flags_t flags, std::vector &comments) : src_(src), - tok_(src_.c_str(), tokenizer_flags_from_parse_flags(flags)), + tok_(new_tokenizer(src_.c_str(), tokenizer_flags_from_parse_flags(flags))), comment_ranges(comments) {} /// \return the token at the given index, without popping it. If the token stream is exhausted, @@ -161,8 +160,8 @@ class token_stream_t { /// \return a new parse token, advancing the tokenizer. /// This returns comments. parse_token_t advance_1() { - auto mtoken = tok_.next(); - if (!mtoken.has_value()) { + auto mtoken = tok_->next(); + if (!mtoken) { return parse_token_t{parse_token_type_t::terminate}; } const tok_t &token = *mtoken; @@ -171,13 +170,13 @@ class token_stream_t { // `builtin --names` lists builtins, but `builtin "--names"` attempts to run --names as a // command. Amazingly as of this writing (10/12/13) nobody seems to have noticed this. // Squint at it really hard and it even starts to look like a feature. - parse_token_t result{parse_token_type_from_tokenizer_token(token.type)}; - const wcstring &text = tok_.copy_text_of(token, &storage_); - result.keyword = keyword_for_token(token.type, text); + parse_token_t result{parse_token_type_from_tokenizer_token(token.type_)}; + const wcstring &text = storage_ = *tok_->text_of(token); + result.keyword = keyword_for_token(token.type_, text); result.has_dash_prefix = !text.empty() && text.at(0) == L'-'; result.is_help_argument = (text == L"-h" || text == L"--help"); result.is_newline = (result.type == parse_token_type_t::end && text == L"\n"); - result.may_be_variable_assignment = variable_assignment_equals_pos(text).has_value(); + result.may_be_variable_assignment = variable_assignment_equals_pos(text) != nullptr; result.tok_error = token.error; // These assertions are totally bogus. Basically our tokenizer works in size_t but we work @@ -222,7 +221,7 @@ class token_stream_t { const wcstring &src_; // The tokenizer to generate new tokens. - tokenizer_t tok_; + rust::Box tok_; /// Any comment nodes are collected here. /// These are only collected if parse_flag_include_comments is set. @@ -396,13 +395,15 @@ static wcstring token_types_user_presentable_description( std::initializer_list types) { assert(types.size() > 0 && "Should not be empty list"); if (types.size() == 1) { - return token_type_user_presentable_description(*types.begin()); + return *token_type_user_presentable_description(*types.begin(), parse_keyword_t::none); } size_t idx = 0; wcstring res; for (parse_token_type_t type : types) { const wchar_t *optor = (idx++ ? L" or " : L""); - append_format(res, L"%ls%ls", optor, token_type_user_presentable_description(type).c_str()); + append_format( + res, L"%ls%ls", optor, + token_type_user_presentable_description(type, parse_keyword_t::none)->c_str()); } return res; } @@ -635,7 +636,7 @@ struct populator_t { if (out_errors_) { parse_error_t err; - err.text = vformat_string(fmt, va); + err.text = std::make_unique(vformat_string(fmt, va)); err.code = code; err.source_start = range.start; err.source_length = range.length; @@ -682,9 +683,10 @@ struct populator_t { "Should not attempt to consume terminate token"); auto tok = consume_any_token(); if (tok.type != type) { - parse_error(tok, parse_error_generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(type).c_str(), - tok.user_presentable_description().c_str()); + parse_error( + tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(type, parse_keyword_t::none)->c_str(), + tok.user_presentable_description().c_str()); return source_range_t{0, 0}; } return tok.range(); @@ -702,10 +704,11 @@ struct populator_t { // TODO: this is a crummy message if we get a tokenizer error, for example: // complete -c foo -a "'abc" if (this->top_type_ == type_t::freestanding_argument_list) { - this->parse_error( - tok, parse_error_generic, _(L"Expected %ls, but found %ls"), - token_type_user_presentable_description(parse_token_type_t::string).c_str(), - tok.user_presentable_description().c_str()); + this->parse_error(tok, parse_error_code_t::generic, _(L"Expected %ls, but found %ls"), + token_type_user_presentable_description(parse_token_type_t::string, + parse_keyword_t::none) + ->c_str(), + tok.user_presentable_description().c_str()); return; } @@ -715,15 +718,15 @@ struct populator_t { // There are three keywords which end a job list. switch (tok.keyword) { case parse_keyword_t::kw_end: - this->parse_error(tok, parse_error_unbalancing_end, + this->parse_error(tok, parse_error_code_t::unbalancing_end, _(L"'end' outside of a block")); break; case parse_keyword_t::kw_else: - this->parse_error(tok, parse_error_unbalancing_else, + this->parse_error(tok, parse_error_code_t::unbalancing_else, _(L"'else' builtin not inside of if block")); break; case parse_keyword_t::kw_case: - this->parse_error(tok, parse_error_unbalancing_case, + this->parse_error(tok, parse_error_code_t::unbalancing_case, _(L"'case' builtin not inside of switch block")); break; default: @@ -738,13 +741,14 @@ struct populator_t { case parse_token_type_t::background: case parse_token_type_t::andand: case parse_token_type_t::oror: - parse_error(tok, parse_error_generic, _(L"Expected a string, but found %ls"), + parse_error(tok, parse_error_code_t::generic, + _(L"Expected a string, but found %ls"), tok.user_presentable_description().c_str()); break; case parse_token_type_t::tokenizer_error: parse_error(tok, parse_error_from_tokenizer_error(tok.tok_error), L"%ls", - tokenizer_get_error_message(tok.tok_error)); + tokenizer_get_error_message(tok.tok_error)->c_str()); break; case parse_token_type_t::end: @@ -968,14 +972,15 @@ struct populator_t { } else if (token1.type != parse_token_type_t::string) { // We may be unwinding already; do not produce another error. // For example in `true | and`. - parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"), + parse_error(token1, parse_error_code_t::generic, + _(L"Expected a command, but found %ls"), token1.user_presentable_description().c_str()); return got_error(); } else if (token1.may_be_variable_assignment) { // Here we have a variable assignment which we chose to not parse as a variable // assignment because there was no string after it. // Ensure we consume the token, so we don't get back here again at the same place. - parse_error(consume_any_token(), parse_error_bare_variable_assignment, L""); + parse_error(consume_any_token(), parse_error_code_t::bare_variable_assignment, L""); return got_error(); } @@ -1025,7 +1030,8 @@ struct populator_t { // For example, `if end` or `while end` will produce this error. // We still have to descend into the decorated statement because // we can't leave our pointer as null. - parse_error(token1, parse_error_generic, _(L"Expected a command, but found %ls"), + parse_error(token1, parse_error_code_t::generic, + _(L"Expected a command, but found %ls"), token1.user_presentable_description().c_str()); return got_error(); @@ -1083,7 +1089,8 @@ struct populator_t { const auto &tok = peek_token(1); if (tok.keyword == parse_keyword_t::kw_and || tok.keyword == parse_keyword_t::kw_or) { const wchar_t *cmdname = (tok.keyword == parse_keyword_t::kw_and ? L"and" : L"or"); - parse_error(tok, parse_error_andor_in_pipeline, INVALID_PIPELINE_CMD_ERR_MSG, cmdname); + parse_error(tok, parse_error_code_t::andor_in_pipeline, INVALID_PIPELINE_CMD_ERR_MSG, + cmdname); } node.accept(*this); } @@ -1112,7 +1119,7 @@ struct populator_t { return; } - parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls", + parse_error(peek, parse_error_code_t::generic, L"Expected %ls, but found %ls", token_types_user_presentable_description({TokTypes...}).c_str(), peek.user_presentable_description().c_str()); token.unsourced = true; @@ -1149,11 +1156,11 @@ struct populator_t { source_range_t kw_range = p.first; const wchar_t *kw_name = p.second; if (kw_name) { - this->parse_error(kw_range, parse_error_generic, + this->parse_error(kw_range, parse_error_code_t::generic, L"Missing end to balance this %ls", kw_name); } } - parse_error(peek, parse_error_generic, L"Expected %ls, but found %ls", + parse_error(peek, parse_error_code_t::generic, L"Expected %ls, but found %ls", keywords_user_presentable_description({KWs...}).c_str(), peek.user_presentable_description().c_str()); return; @@ -1372,7 +1379,7 @@ wcstring ast_t::dump(const wcstring &orig) const { desc = L""; break; default: - desc = token_type_user_presentable_description(n->type); + desc = *token_type_user_presentable_description(n->type, parse_keyword_t::none); break; } append_format(result, L"%ls", desc.c_str()); diff --git a/src/ast.h b/src/ast.h index ed7081e72..86ea1b853 100644 --- a/src/ast.h +++ b/src/ast.h @@ -497,7 +497,7 @@ struct statement_t final : public branch_t { // A job is a non-empty list of statements, separated by pipes. (Non-empty is useful for cases // like if statements, where we require a command). -struct job_t final : public branch_t { +struct job_pipeline_t final : public branch_t { // Maybe the time keyword. optional_t> time; @@ -523,7 +523,7 @@ struct job_conjunction_t final : public branch_t { optional_t decorator{}; // The job itself. - job_t job; + job_pipeline_t job; // The rest of the job conjunction, with && or ||s. job_conjunction_continuation_list_t continuations; @@ -727,7 +727,7 @@ struct job_conjunction_continuation_t final maybe_newlines_t newlines; // The job itself. - job_t job; + job_pipeline_t job; FIELDS(conjunction, newlines, job) }; diff --git a/src/ast_node_types.inc b/src/ast_node_types.inc index b0ac3ea98..1a18675e2 100644 --- a/src/ast_node_types.inc +++ b/src/ast_node_types.inc @@ -19,7 +19,7 @@ ELEMLIST(argument_or_redirection_list, argument_or_redirection) ELEM(variable_assignment) ELEMLIST(variable_assignment_list, variable_assignment) -ELEM(job) +ELEM(job_pipeline) ELEM(job_conjunction) // For historical reasons, a job list is a list of job *conjunctions*. This should be fixed. ELEMLIST(job_list, job_conjunction) diff --git a/src/builtin.cpp b/src/builtin.cpp index 085b278da..5468b5f9f 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -41,10 +41,7 @@ #include "builtins/complete.h" #include "builtins/contains.h" #include "builtins/disown.h" -#include "builtins/echo.h" -#include "builtins/emit.h" #include "builtins/eval.h" -#include "builtins/exit.h" #include "builtins/fg.h" #include "builtins/functions.h" #include "builtins/history.h" @@ -56,17 +53,18 @@ #include "builtins/random.h" #include "builtins/read.h" #include "builtins/realpath.h" -#include "builtins/return.h" #include "builtins/set.h" #include "builtins/set_color.h" +#include "builtins/shared.rs.h" #include "builtins/source.h" #include "builtins/status.h" #include "builtins/string.h" #include "builtins/test.h" #include "builtins/type.h" #include "builtins/ulimit.h" -#include "builtins/wait.h" #include "complete.h" +#include "cxx.h" +#include "cxxgen.h" #include "fallback.h" // IWYU pragma: keep #include "flog.h" #include "io.h" @@ -79,6 +77,10 @@ #include "wgetopt.h" #include "wutil.h" // IWYU pragma: keep +static maybe_t try_get_rust_builtin(const wcstring &cmd); +static maybe_t builtin_run_rust(parser_t &parser, io_streams_t &streams, + const wcstring_list_t &argv, RustBuiltin builtin); + /// Counts the number of arguments in the specified null-terminated array int builtin_count_args(const wchar_t *const *argv) { int argc; @@ -223,6 +225,10 @@ static maybe_t builtin_generic(parser_t &parser, io_streams_t &streams, con return STATUS_CMD_ERROR; } +static maybe_t implemented_in_rust(parser_t &, io_streams_t &, const wchar_t **) { + DIE("builtin is implemented in Rust, this should not be called"); +} + // How many bytes we read() at once. // Since this is just for counting, it can be massive. #define COUNT_CHUNK_SIZE (512 * 256) @@ -374,13 +380,13 @@ static constexpr builtin_data_t builtin_datas[] = { {L"continue", &builtin_break_continue, N_(L"Skip over remaining innermost loop")}, {L"count", &builtin_count, N_(L"Count the number of arguments")}, {L"disown", &builtin_disown, N_(L"Remove job from job list")}, - {L"echo", &builtin_echo, N_(L"Print arguments")}, + {L"echo", &implemented_in_rust, N_(L"Print arguments")}, {L"else", &builtin_generic, N_(L"Evaluate block if condition is false")}, - {L"emit", &builtin_emit, N_(L"Emit an event")}, + {L"emit", &implemented_in_rust, N_(L"Emit an event")}, {L"end", &builtin_generic, N_(L"End a block of commands")}, {L"eval", &builtin_eval, N_(L"Evaluate a string as a statement")}, {L"exec", &builtin_generic, N_(L"Run command in current process")}, - {L"exit", &builtin_exit, N_(L"Exit the shell")}, + {L"exit", &implemented_in_rust, N_(L"Exit the shell")}, {L"false", &builtin_false, N_(L"Return an unsuccessful result")}, {L"fg", &builtin_fg, N_(L"Send job to foreground")}, {L"for", &builtin_generic, N_(L"Perform a set of commands multiple times")}, @@ -398,7 +404,7 @@ static constexpr builtin_data_t builtin_datas[] = { {L"random", &builtin_random, N_(L"Generate random number")}, {L"read", &builtin_read, N_(L"Read a line of input into variables")}, {L"realpath", &builtin_realpath, N_(L"Show absolute path sans symlinks")}, - {L"return", &builtin_return, N_(L"Stop the currently evaluated function")}, + {L"return", &implemented_in_rust, N_(L"Stop the currently evaluated function")}, {L"set", &builtin_set, N_(L"Handle environment variables")}, {L"set_color", &builtin_set_color, N_(L"Set the terminal color")}, {L"source", &builtin_source, N_(L"Evaluate contents of file")}, @@ -410,7 +416,7 @@ static constexpr builtin_data_t builtin_datas[] = { {L"true", &builtin_true, N_(L"Return a successful result")}, {L"type", &builtin_type, N_(L"Check if a thing is a thing")}, {L"ulimit", &builtin_ulimit, N_(L"Get/set resource usage limits")}, - {L"wait", &builtin_wait, N_(L"Wait for background processes completed")}, + {L"wait", &implemented_in_rust, N_(L"Wait for background processes completed")}, {L"while", &builtin_generic, N_(L"Perform a command multiple times")}, }; ASSERT_SORTED_BY_NAME(builtin_datas); @@ -450,38 +456,43 @@ proc_status_t builtin_run(parser_t &parser, const wcstring_list_t &argv, io_stre return proc_status_t::from_exit_code(STATUS_CMD_OK); } - if (const builtin_data_t *data = builtin_lookup(cmdname)) { + maybe_t builtin_ret; + + auto rust_builtin = try_get_rust_builtin(cmdname); + if (rust_builtin.has_value()) { + builtin_ret = builtin_run_rust(parser, streams, argv, *rust_builtin); + } else if (const builtin_data_t *data = builtin_lookup(cmdname)) { // Construct the permutable argv array which the builtin expects, and execute the builtin. null_terminated_array_t argv_arr(argv); - maybe_t builtin_ret = data->func(parser, streams, argv_arr.get()); - - // Flush our out and error streams, and check for their errors. - int out_ret = streams.out.flush_and_check_error(); - int err_ret = streams.err.flush_and_check_error(); - - // Resolve our status code. - // If the builtin itself produced an error, use that error. - // Otherwise use any errors from writing to out and writing to err, in that order. - int code = builtin_ret.has_value() ? *builtin_ret : 0; - if (code == 0) code = out_ret; - if (code == 0) code = err_ret; - - // The exit code is cast to an 8-bit unsigned integer, so saturate to 255. Otherwise, - // multiples of 256 are reported as 0. - if (code > 255) code = 255; - - // Handle the case of an empty status. - if (code == 0 && !builtin_ret.has_value()) { - return proc_status_t::empty(); - } - if (code < 0) { - FLOGF(warning, "builtin %ls returned invalid exit code %d", cmdname.c_str(), code); - } - return proc_status_t::from_exit_code(code); + builtin_ret = data->func(parser, streams, argv_arr.get()); + } else { + FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, cmdname.c_str()); + return proc_status_t::from_exit_code(STATUS_CMD_ERROR); } - FLOGF(error, UNKNOWN_BUILTIN_ERR_MSG, cmdname.c_str()); - return proc_status_t::from_exit_code(STATUS_CMD_ERROR); + // Flush our out and error streams, and check for their errors. + int out_ret = streams.out.flush_and_check_error(); + int err_ret = streams.err.flush_and_check_error(); + + // Resolve our status code. + // If the builtin itself produced an error, use that error. + // Otherwise use any errors from writing to out and writing to err, in that order. + int code = builtin_ret.has_value() ? *builtin_ret : 0; + if (code == 0) code = out_ret; + if (code == 0) code = err_ret; + + // The exit code is cast to an 8-bit unsigned integer, so saturate to 255. Otherwise, + // multiples of 256 are reported as 0. + if (code > 255) code = 255; + + // Handle the case of an empty status. + if (code == 0 && !builtin_ret.has_value()) { + return proc_status_t::empty(); + } + if (code < 0) { + FLOGF(warning, "builtin %ls returned invalid exit code %d", cmdname.c_str(), code); + } + return proc_status_t::from_exit_code(code); } /// Returns a list of all builtin names. @@ -512,3 +523,38 @@ const wchar_t *builtin_get_desc(const wcstring &name) { } return result; } + +static maybe_t try_get_rust_builtin(const wcstring &cmd) { + if (cmd == L"echo") { + return RustBuiltin::Echo; + } + if (cmd == L"emit") { + return RustBuiltin::Emit; + } + if (cmd == L"exit") { + return RustBuiltin::Exit; + } + if (cmd == L"wait") { + return RustBuiltin::Wait; + } + if (cmd == L"return") { + return RustBuiltin::Return; + } + return none(); +} + +static maybe_t builtin_run_rust(parser_t &parser, io_streams_t &streams, + const wcstring_list_t &argv, RustBuiltin builtin) { + ::rust::Vec rust_argv; + for (const wcstring &arg : argv) { + rust_argv.emplace_back(arg.c_str()); + } + + int status_code; + bool update_status = rust_run_builtin(parser, streams, rust_argv, builtin, status_code); + if (update_status) { + return status_code; + } else { + return none(); + } +} diff --git a/src/builtin.h b/src/builtin.h index 3e0685683..e1a61452b 100644 --- a/src/builtin.h +++ b/src/builtin.h @@ -106,4 +106,13 @@ struct help_only_cmd_opts_t { }; int parse_help_only_cmd_opts(help_only_cmd_opts_t &opts, int *optind, int argc, const wchar_t **argv, parser_t &parser, io_streams_t &streams); + +/// An enum of the builtins implemented in Rust. +enum RustBuiltin : int32_t { + Echo, + Emit, + Exit, + Wait, + Return, +}; #endif diff --git a/src/builtins/commandline.cpp b/src/builtins/commandline.cpp index 51bf17f26..5dc33a65d 100644 --- a/src/builtins/commandline.cpp +++ b/src/builtins/commandline.cpp @@ -103,12 +103,12 @@ static void write_part(const wchar_t *begin, const wchar_t *end, int cut_at_curs // std::fwprintf( stderr, L"Subshell: %ls, end char %lc\n", buff, *end ); wcstring out; wcstring buff(begin, end - begin); - tokenizer_t tok(buff.c_str(), TOK_ACCEPT_UNFINISHED); - while (auto token = tok.next()) { + auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED); + while (auto token = tok->next()) { if ((cut_at_cursor) && (token->offset + token->length >= pos)) break; - if (token->type == token_type_t::string) { - wcstring tmp = tok.text_of(*token); + if (token->type_ == token_type_t::string) { + wcstring tmp = *tok->text_of(*token); unescape_string_in_place(&tmp, UNESCAPE_INCOMPLETE); out.append(tmp); out.push_back(L'\n'); diff --git a/src/builtins/complete.cpp b/src/builtins/complete.cpp index d01e747c4..8b781a16d 100644 --- a/src/builtins/complete.cpp +++ b/src/builtins/complete.cpp @@ -337,12 +337,12 @@ maybe_t builtin_complete(parser_t &parser, io_streams_t &streams, const wch } for (const auto &condition_string : condition) { - parse_error_list_t errors; - if (parse_util_detect_errors(condition_string, &errors)) { - for (const auto &error : errors) { + auto errors = new_parse_error_list(); + if (parse_util_detect_errors(condition_string, &*errors)) { + for (size_t i = 0; i < errors->size(); i++) { wcstring prefix(wcstring(cmd) + L": -n '" + condition_string + L"': "); - streams.err.append(error.describe_with_prefix(condition_string, prefix, - parser.is_interactive(), false)); + streams.err.append(*errors->at(i)->describe_with_prefix( + condition_string, prefix, parser.is_interactive(), false)); streams.err.push_back(L'\n'); } return STATUS_CMD_ERROR; diff --git a/src/builtins/echo.cpp b/src/builtins/echo.cpp deleted file mode 100644 index 0f15e36b8..000000000 --- a/src/builtins/echo.cpp +++ /dev/null @@ -1,243 +0,0 @@ -// Implementation of the echo builtin. -#include "config.h" // IWYU pragma: keep - -#include "echo.h" - -#include - -#include "../builtin.h" -#include "../common.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../wgetopt.h" -#include "../wutil.h" // IWYU pragma: keep - -struct echo_cmd_opts_t { - bool print_newline = true; - bool print_spaces = true; - bool interpret_special_chars = false; -}; -static const wchar_t *const short_options = L"+:Eens"; -static const struct woption *const long_options = nullptr; - -static int parse_cmd_opts(echo_cmd_opts_t &opts, int *optind, int argc, const wchar_t **argv, - parser_t &parser, io_streams_t &streams) { - UNUSED(parser); - UNUSED(streams); - const wchar_t *cmd = argv[0]; - int opt; - wgetopter_t w; - echo_cmd_opts_t oldopts = opts; - int oldoptind = 0; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { - case 'n': { - opts.print_newline = false; - break; - } - case 'e': { - opts.interpret_special_chars = true; - break; - } - case 's': { - opts.print_spaces = false; - break; - } - case 'E': { - opts.interpret_special_chars = false; - break; - } - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - opts = oldopts; - *optind = w.woptind - 1; - return STATUS_CMD_OK; - } - default: { - DIE("unexpected retval from wgetopt_long"); - } - } - - // Super cheesy: We keep an old copy of the option state around, - // so we can revert it in case we get an argument like - // "-n foo". - // We need to keep it one out-of-date so we can ignore the *last* option. - // (this might be an issue in wgetopt, but that's a whole other can of worms - // and really only occurs with our weird "put it back" option parsing) - if (w.woptind == oldoptind + 2) { - oldopts = opts; - oldoptind = w.woptind; - } - } - - *optind = w.woptind; - return STATUS_CMD_OK; -} - -/// Parse a numeric escape sequence in str, returning whether we succeeded. Also return the number -/// of characters consumed and the resulting value. Supported escape sequences: -/// -/// \0nnn: octal value, zero to three digits -/// \nnn: octal value, one to three digits -/// \xhh: hex value, one to two digits -static bool builtin_echo_parse_numeric_sequence(const wchar_t *str, size_t *consumed, - unsigned char *out_val) { - bool success = false; - unsigned int start = 0; // the first character of the numeric part of the sequence - - unsigned int base = 0, max_digits = 0; - if (convert_digit(str[0], 8) != -1) { - // Octal escape - base = 8; - - // If the first digit is a 0, we allow four digits (including that zero); otherwise, we - // allow 3. - max_digits = (str[0] == L'0' ? 4 : 3); - } else if (str[0] == L'x') { - // Hex escape - base = 16; - max_digits = 2; - - // Skip the x - start = 1; - } - - if (base == 0) { - return success; - } - - unsigned int idx; - unsigned char val = 0; // resulting character - for (idx = start; idx < start + max_digits; idx++) { - int digit = convert_digit(str[idx], base); - if (digit == -1) break; - val = val * base + digit; - } - - // We succeeded if we consumed at least one digit. - if (idx > start) { - *consumed = idx; - *out_val = val; - success = true; - } - return success; -} - -/// The echo builtin. -/// -/// Bash only respects -n if it's the first argument. We'll do the same. We also support a new, -/// fish specific, option -s to mean "no spaces". -maybe_t builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - UNUSED(cmd); - int argc = builtin_count_args(argv); - echo_cmd_opts_t opts; - int optind; - int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - // The special character \c can be used to indicate no more output. - bool continue_output = true; - - const wchar_t *const *args_to_echo = argv + optind; - // We buffer output so we can write in one go, - // this matters when writing to an fd. - wcstring out; - for (size_t idx = 0; continue_output && args_to_echo[idx] != nullptr; idx++) { - if (opts.print_spaces && idx > 0) { - out.push_back(' '); - } - - const wchar_t *str = args_to_echo[idx]; - for (size_t j = 0; continue_output && str[j]; j++) { - if (!opts.interpret_special_chars || str[j] != L'\\') { - // Not an escape. - out.push_back(str[j]); - } else { - // Most escapes consume one character in addition to the backslash; the numeric - // sequences may consume more, while an unrecognized escape sequence consumes none. - wchar_t wc; - size_t consumed = 1; - switch (str[j + 1]) { - case L'a': { - wc = L'\a'; - break; - } - case L'b': { - wc = L'\b'; - break; - } - case L'e': { - wc = L'\x1B'; - break; - } - case L'f': { - wc = L'\f'; - break; - } - case L'n': { - wc = L'\n'; - break; - } - case L'r': { - wc = L'\r'; - break; - } - case L't': { - wc = L'\t'; - break; - } - case L'v': { - wc = L'\v'; - break; - } - case L'\\': { - wc = L'\\'; - break; - } - case L'c': { - wc = 0; - continue_output = false; - break; - } - default: { - // Octal and hex escape sequences. - unsigned char narrow_val = 0; - if (builtin_echo_parse_numeric_sequence(str + j + 1, &consumed, - &narrow_val)) { - // Here consumed must have been set to something. The narrow_val is a - // literal byte that we want to output (#1894). - wc = ENCODE_DIRECT_BASE + narrow_val % 256; - } else { - // Not a recognized escape. We consume only the backslash. - wc = L'\\'; - consumed = 0; - } - break; - } - } - - // Skip over characters that were part of this escape sequence (but not the - // backslash, which will be handled by the loop increment. - j += consumed; - - if (continue_output) { - out.push_back(wc); - } - } - } - } - if (opts.print_newline && continue_output) { - out.push_back('\n'); - } - - if (!out.empty()) { - streams.out.append(out); - } - - return STATUS_CMD_OK; -} diff --git a/src/builtins/echo.h b/src/builtins/echo.h deleted file mode 100644 index ed4ae2d13..000000000 --- a/src/builtins/echo.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_echo function. -#ifndef FISH_BUILTIN_ECHO_H -#define FISH_BUILTIN_ECHO_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_echo(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/builtins/emit.cpp b/src/builtins/emit.cpp deleted file mode 100644 index b28adb51a..000000000 --- a/src/builtins/emit.cpp +++ /dev/null @@ -1,40 +0,0 @@ -// Implementation of the emit builtin. -#include "config.h" // IWYU pragma: keep - -#include "emit.h" - -#include - -#include "../builtin.h" -#include "../common.h" -#include "../event.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../wutil.h" // IWYU pragma: keep - -/// Implementation of the builtin emit command, used to create events. -maybe_t builtin_emit(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - int argc = builtin_count_args(argv); - help_only_cmd_opts_t opts; - - int optind; - int retval = parse_help_only_cmd_opts(opts, &optind, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - if (opts.print_help) { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - - if (!argv[optind]) { - streams.err.append_format(L"%ls: expected event name\n", cmd); - return STATUS_INVALID_ARGS; - } - - const wchar_t *eventname = argv[optind]; - wcstring_list_t args(argv + optind + 1, argv + argc); - event_fire_generic(parser, eventname, std::move(args)); - return STATUS_CMD_OK; -} diff --git a/src/builtins/emit.h b/src/builtins/emit.h deleted file mode 100644 index b5a21c6dd..000000000 --- a/src/builtins/emit.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_emit function. -#ifndef FISH_BUILTIN_EMIT_H -#define FISH_BUILTIN_EMIT_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_emit(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/builtins/exit.cpp b/src/builtins/exit.cpp deleted file mode 100644 index 47687a644..000000000 --- a/src/builtins/exit.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// Implementation of the exit builtin. -#include "config.h" // IWYU pragma: keep - -#include "exit.h" - -#include - -#include "../builtin.h" -#include "../common.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../parser.h" -#include "../wgetopt.h" -#include "../wutil.h" // IWYU pragma: keep - -struct exit_cmd_opts_t { - bool print_help = false; -}; -static const wchar_t *const short_options = L":h"; -static const struct woption long_options[] = {{L"help", no_argument, 'h'}, {}}; - -static int parse_cmd_opts(exit_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) - int argc, const wchar_t **argv, parser_t &parser, io_streams_t &streams) { - UNUSED(parser); - UNUSED(streams); - const wchar_t *cmd = argv[0]; - int opt; - wgetopter_t w; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { //!OCLINT(too few branches) - case 'h': { - opts.print_help = true; - break; - } - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - // We would normally invoke builtin_unknown_option() and return an error. - // But for this command we want to let it try and parse the value as a negative - // return value. - *optind = w.woptind - 1; - return STATUS_CMD_OK; - } - default: { - DIE("unexpected retval from wgetopt_long"); - } - } - } - - *optind = w.woptind; - return STATUS_CMD_OK; -} - -/// The exit builtin. Calls reader_exit to exit and returns the value specified. -maybe_t builtin_exit(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - int argc = builtin_count_args(argv); - exit_cmd_opts_t opts; - - int optind; - int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - if (opts.print_help) { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - - if (optind + 1 < argc) { - streams.err.append_format(BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd); - builtin_print_error_trailer(parser, streams.err, cmd); - return STATUS_INVALID_ARGS; - } - - if (optind == argc) { - retval = parser.get_last_status(); - } else { - retval = fish_wcstoi(argv[optind]); - if (errno) { - streams.err.append_format(BUILTIN_ERR_NOT_NUMBER, cmd, argv[optind]); - builtin_print_error_trailer(parser, streams.err, cmd); - return STATUS_INVALID_ARGS; - } - } - // Mark that we are exiting in the parser. - // TODO: in concurrent mode this won't successfully exit a pipeline, as there are other parsers - // involved. That is, `exit | sleep 1000` may not exit as hoped. Need to rationalize what - // behavior we want here. - parser.libdata().exit_current_script = true; - return retval; -} diff --git a/src/builtins/exit.h b/src/builtins/exit.h deleted file mode 100644 index cf6bbb6db..000000000 --- a/src/builtins/exit.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_exit function. -#ifndef FISH_BUILTIN_EXIT_H -#define FISH_BUILTIN_EXIT_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_exit(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/builtins/fg.cpp b/src/builtins/fg.cpp index f9a51e67d..73caca9f1 100644 --- a/src/builtins/fg.cpp +++ b/src/builtins/fg.cpp @@ -107,7 +107,7 @@ maybe_t builtin_fg(parser_t &parser, io_streams_t &streams, const wchar_t * std::fwprintf(stderr, FG_MSG, job->job_id(), job->command_wcstr()); } - wcstring ft = tok_command(job->command()); + wcstring ft = *tok_command(job->command()); if (!ft.empty()) { // Provide value for `status current-command` parser.libdata().status_vars.command = ft; diff --git a/src/builtins/function.cpp b/src/builtins/function.cpp index 54672da4e..1eca5b2ef 100644 --- a/src/builtins/function.cpp +++ b/src/builtins/function.cpp @@ -27,7 +27,7 @@ #include "../parser.h" #include "../parser_keywords.h" #include "../proc.h" -#include "../signal.h" +#include "../signals.h" #include "../wait_handle.h" #include "../wgetopt.h" #include "../wutil.h" // IWYU pragma: keep diff --git a/src/builtins/read.cpp b/src/builtins/read.cpp index 72b176af8..ba16d0aa2 100644 --- a/src/builtins/read.cpp +++ b/src/builtins/read.cpp @@ -425,7 +425,8 @@ static int validate_read_args(const wchar_t *cmd, read_cmd_opts_t &opts, int arg return STATUS_INVALID_ARGS; } if (env_var_t::flags_for(argv[i]) & env_var_t::flag_read_only) { - streams.err.append_format(_(L"%ls: %ls: cannot overwrite read-only variable"), cmd, argv[i]); + streams.err.append_format(_(L"%ls: %ls: cannot overwrite read-only variable"), cmd, + argv[i]); builtin_print_error_trailer(parser, streams.err, cmd); return STATUS_INVALID_ARGS; } @@ -529,13 +530,13 @@ maybe_t builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t } if (opts.tokenize) { - tokenizer_t tok{buff.c_str(), TOK_ACCEPT_UNFINISHED}; + auto tok = new_tokenizer(buff.c_str(), TOK_ACCEPT_UNFINISHED); wcstring out; if (opts.array) { // Array mode: assign each token as a separate element of the sole var. wcstring_list_t tokens; - while (auto t = tok.next()) { - auto text = tok.text_of(*t); + while (auto t = tok->next()) { + auto text = *tok->text_of(*t); if (unescape_string(text, &out, UNESCAPE_DEFAULT)) { tokens.push_back(out); } else { @@ -545,9 +546,9 @@ maybe_t builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t parser.set_var_and_fire(*var_ptr++, opts.place, std::move(tokens)); } else { - maybe_t t; - while ((vars_left() - 1 > 0) && (t = tok.next())) { - auto text = tok.text_of(*t); + std::unique_ptr t; + while ((vars_left() - 1 > 0) && (t = tok->next())) { + auto text = *tok->text_of(*t); if (unescape_string(text, &out, UNESCAPE_DEFAULT)) { parser.set_var_and_fire(*var_ptr++, opts.place, out); } else { @@ -556,7 +557,7 @@ maybe_t builtin_read(parser_t &parser, io_streams_t &streams, const wchar_t } // If we still have tokens, set the last variable to them. - if ((t = tok.next())) { + if ((t = tok->next())) { wcstring rest = wcstring(buff, t->offset); parser.set_var_and_fire(*var_ptr++, opts.place, std::move(rest)); } diff --git a/src/builtins/return.cpp b/src/builtins/return.cpp deleted file mode 100644 index 2289b72a7..000000000 --- a/src/builtins/return.cpp +++ /dev/null @@ -1,122 +0,0 @@ -// Implementation of the return builtin. -#include "config.h" // IWYU pragma: keep - -#include "return.h" - -#include -#include -#include -#include - -#include "../builtin.h" -#include "../common.h" -#include "../fallback.h" // IWYU pragma: keep -#include "../io.h" -#include "../maybe.h" -#include "../parser.h" -#include "../wgetopt.h" -#include "../wutil.h" // IWYU pragma: keep - -struct return_cmd_opts_t { - bool print_help = false; -}; -static const wchar_t *const short_options = L":h"; -static const struct woption long_options[] = {{L"help", no_argument, 'h'}, {}}; - -static int parse_cmd_opts(return_cmd_opts_t &opts, int *optind, //!OCLINT(high ncss method) - int argc, const wchar_t **argv, parser_t &parser, io_streams_t &streams) { - UNUSED(parser); - UNUSED(streams); - const wchar_t *cmd = argv[0]; - int opt; - wgetopter_t w; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { //!OCLINT(too few branches) - case 'h': { - opts.print_help = true; - break; - } - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - // We would normally invoke builtin_unknown_option() and return an error. - // But for this command we want to let it try and parse the value as a negative - // return value. - *optind = w.woptind - 1; - return STATUS_CMD_OK; - } - default: { - DIE("unexpected retval from wgetopt_long"); - } - } - } - - *optind = w.woptind; - return STATUS_CMD_OK; -} - -/// Function for handling the return builtin. -maybe_t builtin_return(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - int argc = builtin_count_args(argv); - return_cmd_opts_t opts; - - int optind; - int retval = parse_cmd_opts(opts, &optind, argc, argv, parser, streams); - if (retval != STATUS_CMD_OK) return retval; - - if (opts.print_help) { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - - if (optind + 1 < argc) { - streams.err.append_format(BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd); - builtin_print_error_trailer(parser, streams.err, cmd); - return STATUS_INVALID_ARGS; - } - - if (optind == argc) { - retval = parser.get_last_status(); - } else { - retval = fish_wcstoi(argv[1]); - if (errno) { - streams.err.append_format(BUILTIN_ERR_NOT_NUMBER, cmd, argv[1]); - builtin_print_error_trailer(parser, streams.err, cmd); - return STATUS_INVALID_ARGS; - } - } - - // Find the function block. - bool has_function_block = false; - for (const auto &b : parser.blocks()) { - if (b.is_function_call()) { - has_function_block = true; - break; - } - } - - // *nix does not support negative return values, but our `return` builtin happily accepts being - // called with negative literals (e.g. `return -1`). - // Map negative values to (256 - their absolute value). This prevents `return -1` from - // evaluating to a `$status` of 0 and keeps us from running into undefined behavior by trying to - // left shift a negative value in W_EXITCODE(). - if (retval < 0) { - retval = 256 - (std::abs(retval) % 256); - } - - // If we're not in a function, exit the current script (but not an interactive shell). - if (!has_function_block) { - if (!parser.libdata().is_interactive) { - parser.libdata().exit_current_script = true; - } - return retval; - } - - // Mark a return in the libdata. - parser.libdata().returning = true; - - return retval; -} diff --git a/src/builtins/return.h b/src/builtins/return.h deleted file mode 100644 index 243c56e1c..000000000 --- a/src/builtins/return.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_return function. -#ifndef FISH_BUILTIN_RETURN_H -#define FISH_BUILTIN_RETURN_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_return(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/builtins/status.cpp b/src/builtins/status.cpp index dfc0c0639..23e1fa3aa 100644 --- a/src/builtins/status.cpp +++ b/src/builtins/status.cpp @@ -17,13 +17,13 @@ #include "../common.h" #include "../enum_map.h" #include "../fallback.h" // IWYU pragma: keep -#include "../future_feature_flags.h" #include "../io.h" #include "../maybe.h" #include "../parser.h" #include "../proc.h" #include "../wgetopt.h" #include "../wutil.h" // IWYU pragma: keep +#include "future_feature_flags.h" enum status_cmd_t { STATUS_CURRENT_CMD = 1, @@ -156,12 +156,12 @@ static bool set_status_cmd(const wchar_t *cmd, status_cmd_opts_t &opts, status_c /// Print the features and their values. static void print_features(io_streams_t &streams) { auto max_len = std::numeric_limits::min(); - for (const auto &md : features_t::metadata) - max_len = std::max(max_len, static_cast(wcslen(md.name))); - for (const auto &md : features_t::metadata) { + for (const auto &md : feature_metadata()) + max_len = std::max(max_len, static_cast(md.name->size())); + for (const auto &md : feature_metadata()) { int set = feature_test(md.flag); - streams.out.append_format(L"%-*ls%-3s %ls %ls\n", max_len + 1, md.name, set ? "on" : "off", - md.groups, md.description); + streams.out.append_format(L"%-*ls%-3s %ls %ls\n", max_len + 1, md.name->c_str(), + set ? "on" : "off", md.groups->c_str(), md.description->c_str()); } } @@ -365,11 +365,12 @@ maybe_t builtin_status(parser_t &parser, io_streams_t &streams, const wchar streams.err.append_format(BUILTIN_ERR_ARG_COUNT2, cmd, subcmd_str, 1, args.size()); return STATUS_INVALID_ARGS; } - auto metadata = features_t::metadata_for(args.front().c_str()); - if (!metadata) { - retval = TEST_FEATURE_NOT_RECOGNIZED; - } else { - retval = feature_test(metadata->flag) ? TEST_FEATURE_ON : TEST_FEATURE_OFF; + retval = TEST_FEATURE_NOT_RECOGNIZED; + for (const auto &md : feature_metadata()) { + if (*md.name == args.front()) { + retval = feature_test(md.flag) ? TEST_FEATURE_ON : TEST_FEATURE_OFF; + break; + } } break; } diff --git a/src/builtins/string.cpp b/src/builtins/string.cpp index 0b1cf8df1..424dd2afe 100644 --- a/src/builtins/string.cpp +++ b/src/builtins/string.cpp @@ -19,7 +19,6 @@ #include "../common.h" #include "../env.h" #include "../fallback.h" // IWYU pragma: keep -#include "../future_feature_flags.h" #include "../io.h" #include "../maybe.h" #include "../parse_util.h" @@ -30,6 +29,7 @@ #include "../wgetopt.h" #include "../wildcard.h" #include "../wutil.h" // IWYU pragma: keep +#include "future_feature_flags.h" // Empirically determined. // This is probably down to some pipe buffer or some such, @@ -1240,7 +1240,7 @@ class regex_replacer_t final : public string_replacer_t { regex_replacer_t(const wchar_t *argv0, re::regex_t regex, const wcstring &replacement_, const options_t &opts, io_streams_t &streams) : string_replacer_t(argv0, opts, streams), regex(std::move(regex)) { - if (feature_test(features_t::string_replace_backslash)) { + if (feature_test(feature_flag_t::string_replace_backslash)) { replacement = replacement_; } else { replacement = interpret_escapes(replacement_); diff --git a/src/builtins/wait.cpp b/src/builtins/wait.cpp deleted file mode 100644 index 4f9ad0898..000000000 --- a/src/builtins/wait.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/// Functions for waiting for processes completed. -#include "config.h" // IWYU pragma: keep - -#include "wait.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../builtin.h" -#include "../common.h" -#include "../io.h" -#include "../maybe.h" -#include "../parser.h" -#include "../proc.h" -#include "../signal.h" -#include "../topic_monitor.h" -#include "../wait_handle.h" -#include "../wgetopt.h" -#include "../wutil.h" - -/// \return true if we can wait on a job. -static bool can_wait_on_job(const std::shared_ptr &j) { - return j->is_constructed() && !j->is_foreground() && !j->is_stopped(); -} - -/// \return true if a wait handle matches a pid or a process name. Exactly one should be passed. -/// For convenience, this returns false if the wait handle is null. -static bool wait_handle_matches(pid_t pid, const wchar_t *proc_name, const wait_handle_ref_t &wh) { - assert((pid > 0 || proc_name) && "Must specify either pid or proc_name"); - if (!wh) return false; - return (pid > 0 && pid == wh->pid) || (proc_name && proc_name == wh->base_name); -} - -/// Walk the list of jobs, looking for a process with \p pid (if nonzero) or \p proc_name (if not -/// null). Append all matching wait handles to \p handles. -/// \return true if we found a matching job (even if not waitable), false if not. -static bool find_wait_handles(pid_t pid, const wchar_t *proc_name, const parser_t &parser, - std::vector *handles) { - assert((pid > 0 || proc_name) && "Must specify either pid or proc_name"); - - // Has a job already completed? - // TODO: we can avoid traversing this list if searching by pid. - bool matched = false; - for (const auto &wh : parser.get_wait_handles().get_list()) { - if (wait_handle_matches(pid, proc_name, wh)) { - handles->push_back(wh); - matched = true; - } - } - - // Is there a running job match? - for (const auto &j : parser.jobs()) { - // We want to set 'matched' to true if we could have matched, even if the job was stopped. - bool provide_handle = can_wait_on_job(j); - for (const auto &proc : j->processes) { - auto wh = proc->make_wait_handle(j->internal_job_id); - if (wait_handle_matches(pid, proc_name, wh)) { - matched = true; - if (provide_handle) handles->push_back(std::move(wh)); - } - } - } - return matched; -} - -/// \return all wait handles for all jobs, current and already completed (!). -static std::vector get_all_wait_handles(const parser_t &parser) { - std::vector result; - // Get wait handles for reaped jobs. - const auto &whs = parser.get_wait_handles().get_list(); - result.insert(result.end(), whs.begin(), whs.end()); - - // Get wait handles for running jobs. - for (const auto &j : parser.jobs()) { - if (!can_wait_on_job(j)) continue; - for (const auto &proc : j->processes) { - if (auto wh = proc->make_wait_handle(j->internal_job_id)) { - result.push_back(std::move(wh)); - } - } - } - return result; -} - -static inline bool is_completed(const wait_handle_ref_t &wh) { return wh->completed; } - -/// Wait for the given wait handles to be marked as completed. -/// If \p any_flag is set, wait for the first one; otherwise wait for all. -/// \return a status code. -static int wait_for_completion(parser_t &parser, const std::vector &whs, - bool any_flag) { - if (whs.empty()) return 0; - - sigchecker_t sigint(topic_t::sighupint); - for (;;) { - if (any_flag ? std::any_of(whs.begin(), whs.end(), is_completed) - : std::all_of(whs.begin(), whs.end(), is_completed)) { - // Remove completed wait handles (at most 1 if any_flag is set). - for (const auto &wh : whs) { - if (is_completed(wh)) { - parser.get_wait_handles().remove(wh); - if (any_flag) break; - } - } - return 0; - } - if (sigint.check()) { - return 128 + SIGINT; - } - proc_wait_any(parser); - } - DIE("Unreachable"); -} - -/// Tests if all characters in the wide string are numeric. -static bool iswnumeric(const wchar_t *n) { - for (; *n; n++) { - if (*n < L'0' || *n > L'9') { - return false; - } - } - return true; -} - -maybe_t builtin_wait(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { - const wchar_t *cmd = argv[0]; - int argc = builtin_count_args(argv); - bool any_flag = false; // flag for -n option - bool print_help = false; - - static const wchar_t *const short_options = L":nh"; - static const struct woption long_options[] = { - {L"any", no_argument, 'n'}, {L"help", no_argument, 'h'}, {}}; - - int opt; - wgetopter_t w; - while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { - switch (opt) { - case 'n': - any_flag = true; - break; - case 'h': - print_help = true; - break; - case ':': { - builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - case '?': { - builtin_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); - return STATUS_INVALID_ARGS; - } - default: { - DIE("unexpected retval from wgetopt_long"); - } - } - } - - if (print_help) { - builtin_print_help(parser, streams, cmd); - return STATUS_CMD_OK; - } - - if (w.woptind == argc) { - // No jobs specified. - // Note this may succeed with an empty wait list. - return wait_for_completion(parser, get_all_wait_handles(parser), any_flag); - } - - // Get the list of wait handles for our waiting. - std::vector wait_handles; - for (int i = w.woptind; i < argc; i++) { - if (iswnumeric(argv[i])) { - // argument is pid - pid_t pid = fish_wcstoi(argv[i]); - if (errno || pid <= 0) { - streams.err.append_format(_(L"%ls: '%ls' is not a valid process id\n"), cmd, - argv[i]); - continue; - } - if (!find_wait_handles(pid, nullptr, parser, &wait_handles)) { - streams.err.append_format(_(L"%ls: Could not find a job with process id '%d'\n"), - cmd, pid); - } - } else { - // argument is process name - if (!find_wait_handles(0, argv[i], parser, &wait_handles)) { - streams.err.append_format( - _(L"%ls: Could not find child processes with the name '%ls'\n"), cmd, argv[i]); - } - } - } - if (wait_handles.empty()) return STATUS_INVALID_ARGS; - return wait_for_completion(parser, wait_handles, any_flag); -} diff --git a/src/builtins/wait.h b/src/builtins/wait.h deleted file mode 100644 index 2bc0a0bcd..000000000 --- a/src/builtins/wait.h +++ /dev/null @@ -1,11 +0,0 @@ -// Prototypes for executing builtin_wait function. -#ifndef FISH_BUILTIN_WAIT_H -#define FISH_BUILTIN_WAIT_H - -#include "../maybe.h" - -class parser_t; -struct io_streams_t; - -maybe_t builtin_wait(parser_t &parser, io_streams_t &streams, const wchar_t **argv); -#endif diff --git a/src/common.cpp b/src/common.cpp index a1cc7c63c..25c9e4940 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -39,7 +39,7 @@ #include "future_feature_flags.h" #include "global_safety.h" #include "iothread.h" -#include "signal.h" +#include "signals.h" #include "termsize.h" #include "topic_monitor.h" #include "wcstringutil.h" @@ -863,7 +863,7 @@ static void escape_string_script(const wchar_t *orig_in, size_t in_len, wcstring const bool escape_printables = !(flags & ESCAPE_NO_PRINTABLES); const bool no_quoted = static_cast(flags & ESCAPE_NO_QUOTED); const bool no_tilde = static_cast(flags & ESCAPE_NO_TILDE); - const bool no_qmark = feature_test(features_t::qmark_noglob); + const bool no_qmark = feature_test(feature_flag_t::qmark_noglob); const bool symbolic = static_cast(flags & ESCAPE_SYMBOLIC) && (MB_CUR_MAX > 1); assert((!symbolic || !escape_printables) && "symbolic implies escape-no-printables"); @@ -1061,9 +1061,7 @@ static wcstring escape_string_pcre2(const wcstring &in) { case L'-': case L']': out.push_back('\\'); - __fallthrough__ - default: - out.push_back(c); + __fallthrough__ default : out.push_back(c); } } @@ -1225,8 +1223,8 @@ maybe_t read_unquoted_escape(const wchar_t *input, wcstring *result, boo // that are valid on their own, which is true for UTF-8) byte_buff.push_back(static_cast(res)); result_char_or_none = none(); - if (input[in_pos] == L'\\' - && (input[in_pos + 1] == L'X' || input[in_pos + 1] == L'x')) { + if (input[in_pos] == L'\\' && + (input[in_pos + 1] == L'X' || input[in_pos + 1] == L'x')) { in_pos++; continue; } @@ -1403,7 +1401,7 @@ static bool unescape_string_internal(const wchar_t *const input, const size_t in break; } case L'?': { - if (unescape_special && !feature_test(features_t::qmark_noglob)) { + if (unescape_special && !feature_test(feature_flag_t::qmark_noglob)) { to_append_or_none = ANY_CHAR; } break; diff --git a/src/common.h b/src/common.h index 0ba47f3bc..c30ac2c0a 100644 --- a/src/common.h +++ b/src/common.h @@ -342,7 +342,7 @@ void format_ullong_safe(wchar_t buff[64], unsigned long long val); void narrow_string_safe(char buff[64], const wchar_t *s); /// Stored in blocks to reference the file which created the block. -using filename_ref_t = std::shared_ptr; +using filename_ref_t = std::shared_ptr; using scoped_lock = std::lock_guard; @@ -446,15 +446,16 @@ wcstring vformat_string(const wchar_t *format, va_list va_orig); void append_format(wcstring &str, const wchar_t *format, ...); void append_formatv(wcstring &target, const wchar_t *format, va_list va_orig); -#ifdef HAVE_STD__MAKE_UNIQUE -using std::make_unique; -#else +#ifndef HAVE_STD__MAKE_UNIQUE /// make_unique implementation +namespace std { template std::unique_ptr make_unique(Args &&...args) { return std::unique_ptr(new T(std::forward(args)...)); } +} // namespace std #endif +using std::make_unique; /// This functions returns the end of the quoted substring beginning at \c pos. Returns 0 on error. /// diff --git a/src/complete.cpp b/src/complete.cpp index 9fdb57dc6..c98e40acf 100644 --- a/src/complete.cpp +++ b/src/complete.cpp @@ -864,7 +864,7 @@ bool completer_t::complete_param_for_command(const wcstring &cmd_orig, const wcs if (wildcard_match(match, key.first)) { // Copy all of their options into our list. Oof, this is a lot of copying. // We have to copy them in reverse order to preserve legacy behavior (#9221). - const auto& options = kv.second.get_options(); + const auto &options = kv.second.get_options(); all_options.emplace_back(options.rbegin(), options.rend()); } } @@ -894,7 +894,8 @@ bool completer_t::complete_param_for_command(const wcstring &cmd_orig, const wcs if (this->conditions_test(o.conditions)) { if (o.type == option_type_short) { - // Only override a true last_option_requires_param value with a false one + // Only override a true last_option_requires_param value with a false + // one if (last_option_requires_param.has_value()) { last_option_requires_param = *last_option_requires_param && o.result_mode.requires_param; @@ -1325,8 +1326,8 @@ cleanup_t completer_t::apply_var_assignments(const wcstring_list_t &var_assignme const expand_flags_t expand_flags = expand_flag::skip_cmdsubst; const block_t *block = ctx.parser->push_block(block_t::variable_assignment_block()); for (const wcstring &var_assign : var_assignments) { - maybe_t equals_pos = variable_assignment_equals_pos(var_assign); - assert(equals_pos.has_value() && "All variable assignments should have equals position"); + auto equals_pos = variable_assignment_equals_pos(var_assign); + assert(equals_pos && "All variable assignments should have equals position"); const wcstring variable_name = var_assign.substr(0, *equals_pos); const wcstring expression = var_assign.substr(*equals_pos + 1); @@ -1409,11 +1410,11 @@ void completer_t::walk_wrap_chain(const wcstring &cmd, const wcstring &cmdline, // Separate the wrap target into any variable assignments VAR=... and the command itself. wcstring wrapped_command; - tokenizer_t tokenizer(wt.c_str(), 0); + auto tokenizer = new_tokenizer(wt.c_str(), 0); size_t wrapped_command_offset_in_wt = wcstring::npos; - while (auto tok = tokenizer.next()) { - wcstring tok_src = tok->get_source(wt); - if (variable_assignment_equals_pos(tok_src).has_value()) { + while (auto tok = tokenizer->next()) { + wcstring tok_src = *tok->get_source(wt); + if (variable_assignment_equals_pos(tok_src)) { ad->var_assignments->push_back(std::move(tok_src)); } else { wrapped_command_offset_in_wt = tok->offset; @@ -1492,7 +1493,7 @@ void completer_t::mark_completions_duplicating_arguments(const wcstring &cmd, // Get all the arguments, unescaped, into an array that we're going to bsearch. wcstring_list_t arg_strs; for (const auto &arg : args) { - wcstring argstr = arg.get_source(cmd); + wcstring argstr = *arg.get_source(cmd); wcstring argstr_unesc; if (unescape_string(argstr, &argstr_unesc, UNESCAPE_DEFAULT)) { arg_strs.push_back(std::move(argstr_unesc)); @@ -1549,7 +1550,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) { tokens.erase( std::remove_if(tokens.begin(), tokens.end(), [&cmdline](const tok_t &token) { - return parser_keywords_is_subcommand(token.get_source(cmdline)); + return parser_keywords_is_subcommand(*token.get_source(cmdline)); }), tokens.end()); } @@ -1559,8 +1560,8 @@ void completer_t::perform_for_commandline(wcstring cmdline) { wcstring_list_t var_assignments; for (const tok_t &tok : tokens) { if (tok.location_in_or_at_end_of_source_range(cursor_pos)) break; - wcstring tok_src = tok.get_source(cmdline); - if (!variable_assignment_equals_pos(tok_src).has_value()) break; + wcstring tok_src = *tok.get_source(cmdline); + if (!variable_assignment_equals_pos(tok_src)) break; var_assignments.push_back(std::move(tok_src)); } tokens.erase(tokens.begin(), tokens.begin() + var_assignments.size()); @@ -1583,26 +1584,27 @@ void completer_t::perform_for_commandline(wcstring cmdline) { effective_cmdline = &effective_cmdline_buf; } - if (tokens.back().type == token_type_t::comment) { + if (tokens.back().type_ == token_type_t::comment) { return; } - tokens.erase(std::remove_if(tokens.begin(), tokens.end(), - [](const tok_t &tok) { return tok.type == token_type_t::comment; }), - tokens.end()); + tokens.erase( + std::remove_if(tokens.begin(), tokens.end(), + [](const tok_t &tok) { return tok.type_ == token_type_t::comment; }), + tokens.end()); assert(!tokens.empty()); const tok_t &cmd_tok = tokens.front(); const tok_t &cur_tok = tokens.back(); // Since fish does not currently support redirect in command position, we return here. - if (cmd_tok.type != token_type_t::string) return; - if (cur_tok.type == token_type_t::error) return; + if (cmd_tok.type_ != token_type_t::string) return; + if (cur_tok.type_ == token_type_t::error) return; for (const auto &tok : tokens) { // If there was an error, it was in the last token. - assert(tok.type == token_type_t::string || tok.type == token_type_t::redirect); + assert(tok.type_ == token_type_t::string || tok.type_ == token_type_t::redirect); } // If we are completing a variable name or a tilde expansion user name, we do that and // return. No need for any other completions. - const wcstring current_token = cur_tok.get_source(cmdline); + const wcstring current_token = *cur_tok.get_source(cmdline); if (cur_tok.location_in_or_at_end_of_source_range(cursor_pos)) { if (try_complete_variable(current_token) || try_complete_user(current_token)) { return; @@ -1610,8 +1612,8 @@ void completer_t::perform_for_commandline(wcstring cmdline) { } if (cmd_tok.location_in_or_at_end_of_source_range(cursor_pos)) { - maybe_t equal_sign_pos = variable_assignment_equals_pos(current_token); - if (equal_sign_pos.has_value()) { + auto equal_sign_pos = variable_assignment_equals_pos(current_token); + if (equal_sign_pos) { complete_param_expand(current_token, true /* do_file */); return; } @@ -1621,11 +1623,11 @@ void completer_t::perform_for_commandline(wcstring cmdline) { return; } // See whether we are in an argument, in a redirection or in the whitespace in between. - bool in_redirection = cur_tok.type == token_type_t::redirect; + bool in_redirection = cur_tok.type_ == token_type_t::redirect; bool had_ddash = false; wcstring current_argument, previous_argument; - if (cur_tok.type == token_type_t::string && + if (cur_tok.type_ == token_type_t::string && cur_tok.location_in_or_at_end_of_source_range(position_in_statement)) { // If the cursor is in whitespace, then the "current" argument is empty and the // previous argument is the matching one. But if the cursor was in or at the end @@ -1639,15 +1641,15 @@ void completer_t::perform_for_commandline(wcstring cmdline) { current_argument = current_token; if (tokens.size() >= 2) { tok_t prev_tok = tokens.at(tokens.size() - 2); - if (prev_tok.type == token_type_t::string) - previous_argument = prev_tok.get_source(cmdline); - in_redirection = prev_tok.type == token_type_t::redirect; + if (prev_tok.type_ == token_type_t::string) + previous_argument = *prev_tok.get_source(cmdline); + in_redirection = prev_tok.type_ == token_type_t::redirect; } } // Check to see if we have a preceding double-dash. for (size_t i = 0; i < tokens.size() - 1; i++) { - if (tokens.at(i).get_source(cmdline) == L"--") { + if (*tokens.at(i).get_source(cmdline) == L"--") { had_ddash = true; break; } @@ -1665,7 +1667,7 @@ void completer_t::perform_for_commandline(wcstring cmdline) { source_offset_t bias = cmdline.size() - effective_cmdline->size(); source_range_t command_range = {cmd_tok.offset - bias, cmd_tok.length}; - wcstring exp_command = cmd_tok.get_source(cmdline); + wcstring exp_command = *cmd_tok.get_source(cmdline); bool unescaped = expand_command_token(ctx, exp_command) && unescape_string(previous_argument, &arg_data.previous_argument, UNESCAPE_DEFAULT) && diff --git a/src/env_universal_common.cpp b/src/env_universal_common.cpp index 4bdd1c372..db50120b5 100644 --- a/src/env_universal_common.cpp +++ b/src/env_universal_common.cpp @@ -37,6 +37,7 @@ #include "env.h" #include "env_universal_common.h" #include "fallback.h" // IWYU pragma: keep +#include "fd_readable_set.rs.h" #include "flog.h" #include "path.h" #include "utf8.h" @@ -1335,7 +1336,7 @@ class universal_notifier_named_pipe_t final : public universal_notifier_t { // If we're no longer readable, go back to wait mode. // Conversely, if we have been readable too long, perhaps some fish died while its // written data was still on the pipe; drain some. - if (!fd_readable_set_t::poll_fd_readable(pipe_fd.fd())) { + if (!poll_fd_readable(pipe_fd.fd())) { set_state(waiting_for_readable); } else if (get_time() >= state_start_usec + k_readable_too_long_duration_usec) { drain_excess(); @@ -1355,7 +1356,7 @@ class universal_notifier_named_pipe_t final : public universal_notifier_t { // change occurred with ours. if (get_time() >= state_start_usec + k_flash_duration_usec) { drain_written(); - if (!fd_readable_set_t::poll_fd_readable(pipe_fd.fd())) { + if (!poll_fd_readable(pipe_fd.fd())) { set_state(waiting_for_readable); } else { set_state(polling_during_readable); diff --git a/src/event.cpp b/src/event.cpp index f0465a104..4feca0a63 100644 --- a/src/event.cpp +++ b/src/event.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "fallback.h" // IWYU pragma: keep @@ -20,7 +21,7 @@ #include "maybe.h" #include "parser.h" #include "proc.h" -#include "signal.h" +#include "signals.h" #include "termsize.h" #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep @@ -488,6 +489,14 @@ void event_print(io_streams_t &streams, const wcstring &type_filter) { } } +void event_fire_generic(parser_t &parser, wcstring name, const wcharz_t *argv, int argc) { + wcstring_list_t args_vec{}; + for (int i = 0; i < argc; i++) { + args_vec.push_back(argv[i]); + } + event_fire_generic(parser, std::move(name), std::move(args_vec)); +} + void event_fire_generic(parser_t &parser, wcstring name, wcstring_list_t args) { event_t ev(event_type_t::generic); ev.desc.str_param1 = std::move(name); diff --git a/src/event.h b/src/event.h index bbdf7bd30..c7b2380c2 100644 --- a/src/event.h +++ b/src/event.h @@ -15,6 +15,7 @@ #include "common.h" #include "global_safety.h" +#include "wutil.h" struct io_streams_t; @@ -162,6 +163,9 @@ void event_print(io_streams_t &streams, const wcstring &type_filter); /// Returns a string describing the specified event. wcstring event_get_desc(const parser_t &parser, const event_t &e); +// FFI helper for event_fire_generic +void event_fire_generic(parser_t &parser, wcstring name, const wcharz_t *argv, int argc); + /// Fire a generic event with the specified name. void event_fire_generic(parser_t &parser, wcstring name, wcstring_list_t args = {}); diff --git a/src/exec.cpp b/src/exec.cpp index 7fae21c3d..daa942922 100644 --- a/src/exec.cpp +++ b/src/exec.cpp @@ -47,7 +47,7 @@ #include "proc.h" #include "reader.h" #include "redirection.h" -#include "timer.h" +#include "timer.rs.h" #include "trace.h" #include "wait_handle.h" #include "wcstringutil.h" @@ -240,7 +240,7 @@ static void internal_exec(env_stack_t &vars, job_t *j, const io_chain_t &block_i } // child_setup_process makes sure signals are properly set up. - dup2_list_t redirs = dup2_list_t::resolve_chain(all_ios); + dup2_list_t redirs = dup2_list_resolve_chain_shim(all_ios); if (child_setup_process(false /* not claim_tty */, *j, false /* not is_forked */, redirs) == 0) { // Decrement SHLVL as we're removing ourselves from the shell "stack". @@ -306,7 +306,7 @@ static void run_internal_process(process_t *p, std::string &&outdata, std::strin // Note it's important we do this even if we have no out or err data, because we may have been // asked to truncate a file (e.g. `echo -n '' > /tmp/truncateme.txt'). The open() in the dup2 // list resolution will ensure this happens. - f->dup2s = dup2_list_t::resolve_chain(ios); + f->dup2s = dup2_list_resolve_chain_shim(ios); // Figure out which source fds to write to. If they are closed (unlikely) we just exit // successfully. @@ -514,7 +514,7 @@ static launch_result_t exec_external_command(parser_t &parser, const std::shared null_terminated_array_t argv_array(narrow_argv); // Convert our IO chain to a dup2 sequence. - auto dup2s = dup2_list_t::resolve_chain(proc_io_chain); + auto dup2s = dup2_list_resolve_chain_shim(proc_io_chain); // Ensure that stdin is blocking before we hand it off (see issue #176). // Note this will also affect stdout and stderr if they refer to the same tty. @@ -717,8 +717,9 @@ static proc_performer_t get_performer_for_builtin( } else { // We are not a pipe. Check if there is a redirection local to the process // that's not io_mode_t::close. - for (const auto &redir : p->redirection_specs()) { - if (redir.fd == STDIN_FILENO && !redir.is_close()) { + for (size_t i = 0; i < p->redirection_specs().size(); i++) { + const auto *redir = p->redirection_specs().at(i); + if (redir->fd() == STDIN_FILENO && !redir->is_close()) { stdin_is_directly_redirected = true; break; } @@ -1018,7 +1019,7 @@ bool exec_job(parser_t &parser, const shared_ptr &j, const io_chain_t &bl } return false; } - cleanup_t timer = push_timer(j->wants_timing() && !no_exec()); + auto timer = push_timer(j->wants_timing() && !no_exec()); // Get the deferred process, if any. We will have to remember its pipes. autoclose_pipes_t deferred_pipes; diff --git a/src/expand.cpp b/src/expand.cpp index e04077d2b..6dc045c60 100644 --- a/src/expand.cpp +++ b/src/expand.cpp @@ -71,14 +71,14 @@ static void append_syntax_error(parse_error_list_t *errors, size_t source_start, parse_error_t error; error.source_start = source_start; error.source_length = 0; - error.code = parse_error_syntax; + error.code = parse_error_code_t::syntax; va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - errors->push_back(error); + errors->push_back(std::move(error)); } /// Append a cmdsub error to the given error list. But only do so if the error hasn't already been @@ -91,18 +91,18 @@ static void append_cmdsub_error(parse_error_list_t *errors, size_t source_start, parse_error_t error; error.source_start = source_start; error.source_length = source_end - source_start + 1; - error.code = parse_error_cmdsubst; + error.code = parse_error_code_t::cmdsubst; va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - for (const auto &it : *errors) { - if (error.text == it.text) return; + for (size_t i = 0; i < errors->size(); i++) { + if (*error.text == *errors->at(i)->text()) return; } - errors->push_back(error); + errors->push_back(std::move(error)); } /// Append an overflow error, when expansion produces too much data. @@ -112,8 +112,8 @@ static expand_result_t append_overflow_error(parse_error_list_t *errors, parse_error_t error; error.source_start = source_start; error.source_length = 0; - error.code = parse_error_generic; - error.text = _(L"Expansion produced too many results"); + error.code = parse_error_code_t::generic; + error.text = std::make_unique(_(L"Expansion produced too many results")); errors->push_back(std::move(error)); } return expand_result_t::make_error(STATUS_EXPAND_ERROR); diff --git a/src/fd_monitor.cpp b/src/fd_monitor.cpp index 6d6934014..7f932c53f 100644 --- a/src/fd_monitor.cpp +++ b/src/fd_monitor.cpp @@ -116,7 +116,8 @@ bool fd_monitor_item_t::poke_item(const poke_list_t &pokelist) { void fd_monitor_t::run_in_background() { ASSERT_IS_BACKGROUND_THREAD(); poke_list_t pokelist; - fd_readable_set_t fds; + auto fds_box = new_fd_readable_set(); + auto &fds = *fds_box; for (;;) { // Poke any items that need it. if (!pokelist.empty()) { @@ -131,7 +132,7 @@ void fd_monitor_t::run_in_background() { fds.add(change_signal_fd); auto now = std::chrono::steady_clock::now(); - uint64_t timeout_usec = fd_monitor_item_t::kNoTimeout; + uint64_t timeout_usec = kNoTimeout; for (auto &item : items_) { fds.add(item.fd.fd()); @@ -145,8 +146,7 @@ void fd_monitor_t::run_in_background() { // We refer to this as the wait-lap. bool is_wait_lap = (items_.size() == 0); if (is_wait_lap) { - assert(timeout_usec == fd_monitor_item_t::kNoTimeout && - "Should not have a timeout on wait-lap"); + assert(timeout_usec == kNoTimeout && "Should not have a timeout on wait-lap"); timeout_usec = 256 * kUsecPerMsec; } diff --git a/src/fd_monitor.h b/src/fd_monitor.h index 6b4005a68..311606940 100644 --- a/src/fd_monitor.h +++ b/src/fd_monitor.h @@ -11,6 +11,7 @@ #include // IWYU pragma: keep #include "common.h" +#include "fd_readable_set.rs.h" #include "fds.h" #include "maybe.h" @@ -33,9 +34,6 @@ struct fd_monitor_item_t { /// The callback may close \p fd, in which case the item is removed. using callback_t = std::function; - /// A sentinel value meaning no timeout. - static constexpr uint64_t kNoTimeout = fd_readable_set_t::kNoTimeout; - /// The fd to monitor. autoclose_fd_t fd{}; diff --git a/src/fds.cpp b/src/fds.cpp index 0dbae2eb3..225b6b7b4 100644 --- a/src/fds.cpp +++ b/src/fds.cpp @@ -29,109 +29,6 @@ void autoclose_fd_t::close() { fd_ = -1; } -fd_readable_set_t::fd_readable_set_t() { clear(); } - -#if FISH_READABLE_SET_USE_POLL - -// Convert from a usec to a poll-friendly msec. -static int usec_to_poll_msec(uint64_t timeout_usec) { - uint64_t timeout_msec = timeout_usec / kUsecPerMsec; - // Round to nearest, down for halfway. - timeout_msec += ((timeout_usec % kUsecPerMsec) > kUsecPerMsec / 2) ? 1 : 0; - if (timeout_usec == fd_readable_set_t::kNoTimeout || - timeout_msec > std::numeric_limits::max()) { - // Negative values mean wait forever in poll-speak. - return -1; - } - return static_cast(timeout_msec); -} - -void fd_readable_set_t::clear() { pollfds_.clear(); } - -static inline bool pollfd_less_than(const pollfd &lhs, int rhs) { return lhs.fd < rhs; } - -void fd_readable_set_t::add(int fd) { - if (fd >= 0) { - auto where = std::lower_bound(pollfds_.begin(), pollfds_.end(), fd, pollfd_less_than); - if (where == pollfds_.end() || where->fd != fd) { - pollfds_.insert(where, pollfd{fd, POLLIN, 0}); - } - } -} - -bool fd_readable_set_t::test(int fd) const { - // If a pipe is widowed with no data, Linux sets POLLHUP but not POLLIN, so test for both. - auto where = std::lower_bound(pollfds_.begin(), pollfds_.end(), fd, pollfd_less_than); - return where != pollfds_.end() && where->fd == fd && (where->revents & (POLLIN | POLLHUP)); -} - -// static -int fd_readable_set_t::do_poll(struct pollfd *fds, size_t count, uint64_t timeout_usec) { - assert(count <= std::numeric_limits::max() && "count too big"); - return ::poll(fds, static_cast(count), usec_to_poll_msec(timeout_usec)); -} - -int fd_readable_set_t::check_readable(uint64_t timeout_usec) { - if (pollfds_.empty()) return 0; - return do_poll(&pollfds_[0], pollfds_.size(), timeout_usec); -} - -// static -bool fd_readable_set_t::is_fd_readable(int fd, uint64_t timeout_usec) { - if (fd < 0) return false; - struct pollfd pfd { - fd, POLLIN, 0 - }; - int ret = fd_readable_set_t::do_poll(&pfd, 1, timeout_usec); - return ret > 0 && (pfd.revents & POLLIN); -} - -#else -// Implementation based on select(). - -void fd_readable_set_t::clear() { - FD_ZERO(&fdset_); - nfds_ = 0; -} - -void fd_readable_set_t::add(int fd) { - if (fd >= FD_SETSIZE) { - FLOGF(error, "fd %d too large for select()", fd); - return; - } - if (fd >= 0) { - FD_SET(fd, &fdset_); - nfds_ = std::max(nfds_, fd + 1); - } -} - -bool fd_readable_set_t::test(int fd) const { return fd >= 0 && FD_ISSET(fd, &fdset_); } - -int fd_readable_set_t::check_readable(uint64_t timeout_usec) { - if (timeout_usec == kNoTimeout) { - return ::select(nfds_, &fdset_, nullptr, nullptr, nullptr); - } else { - struct timeval tvs; - tvs.tv_sec = timeout_usec / kUsecPerSec; - tvs.tv_usec = timeout_usec % kUsecPerSec; - return ::select(nfds_, &fdset_, nullptr, nullptr, &tvs); - } -} - -// static -bool fd_readable_set_t::is_fd_readable(int fd, uint64_t timeout_usec) { - if (fd < 0) return false; - fd_readable_set_t s; - s.add(fd); - int res = s.check_readable(timeout_usec); - return res > 0 && s.test(fd); -} - -#endif // not FISH_READABLE_SET_USE_POLL - -// static -bool fd_readable_set_t::poll_fd_readable(int fd) { return is_fd_readable(fd, 0); } - #ifdef HAVE_EVENTFD // Note we do not want to use EFD_SEMAPHORE because we are binary (not counting) semaphore. fd_event_signaller_t::fd_event_signaller_t() { @@ -284,6 +181,15 @@ maybe_t make_autoclose_pipes() { return autoclose_pipes_t(std::move(read_end), std::move(write_end)); } +pipes_ffi_t make_pipes_ffi() { + pipes_ffi_t res = {-1, -1}; + if (auto pipes = make_autoclose_pipes()) { + res.read = pipes->read.acquire(); + res.write = pipes->write.acquire(); + } + return res; +} + int set_cloexec(int fd, bool should_set) { // Note we don't want to overwrite existing flags like O_NONBLOCK which may be set. So fetch the // existing flags and modify them. diff --git a/src/fds.h b/src/fds.h index 0b315eb95..0f5b508ce 100644 --- a/src/fds.h +++ b/src/fds.h @@ -24,6 +24,9 @@ /// (like >&5). extern const int k_first_high_fd; +/// A sentinel value indicating no timeout. +#define kNoTimeout (std::numeric_limits::max()) + /// A helper class for managing and automatically closing a file descriptor. class autoclose_fd_t : noncopyable_t { int fd_; @@ -63,62 +66,6 @@ class autoclose_fd_t : noncopyable_t { ~autoclose_fd_t() { close(); } }; -// Resolve whether to use poll() or select(). -#ifndef FISH_READABLE_SET_USE_POLL -#ifdef __APPLE__ -// Apple's `man poll`: "The poll() system call currently does not support devices." -#define FISH_READABLE_SET_USE_POLL 0 -#else -// Use poll other places so we can support unlimited fds. -#define FISH_READABLE_SET_USE_POLL 1 -#endif -#endif - -/// A modest wrapper around select() or poll(), according to FISH_READABLE_SET_USE_POLL. -/// This allows accumulating a set of fds and then seeing if they are readable. -/// This only handles readability. -struct fd_readable_set_t { - /// Construct an empty set. - fd_readable_set_t(); - - /// Reset back to an empty set. - void clear(); - - /// Add an fd to the set. The fd is ignored if negative (for convenience). - void add(int fd); - - /// \return true if the given fd is marked as set, in our set. \returns false if negative. - bool test(int fd) const; - - /// Call select() or poll(), according to FISH_READABLE_SET_USE_POLL. Note this destructively - /// modifies the set. \return the result of select() or poll(). - int check_readable(uint64_t timeout_usec = fd_readable_set_t::kNoTimeout); - - /// Check if a single fd is readable, with a given timeout. - /// \return true if readable, false if not. - static bool is_fd_readable(int fd, uint64_t timeout_usec); - - /// Check if a single fd is readable, without blocking. - /// \return true if readable, false if not. - static bool poll_fd_readable(int fd); - - /// A special timeout value which may be passed to indicate no timeout. - static constexpr uint64_t kNoTimeout = std::numeric_limits::max(); - - private: -#if FISH_READABLE_SET_USE_POLL - // Our list of FDs, sorted by fd. - std::vector pollfds_{}; - - // Helper function. - static int do_poll(struct pollfd *fds, size_t count, uint64_t timeout_usec); -#else - // The underlying fdset and nfds value to pass to select(). - fd_set fdset_; - int nfds_{0}; -#endif -}; - /// Helper type returned from making autoclose pipes. struct autoclose_pipes_t { /// Read end of the pipe. @@ -137,6 +84,14 @@ struct autoclose_pipes_t { /// \return pipes on success, none() on error. maybe_t make_autoclose_pipes(); +/// Create pipes. +/// Upon failure both values will be negative. +struct pipes_ffi_t { + int read; + int write; +}; +pipes_ffi_t make_pipes_ffi(); + /// An event signaller implemented using a file descriptor, so it can plug into select(). /// This is like a binary semaphore. A call to post() will signal an event, making the fd readable. /// Multiple calls to post() may be coalesced. On Linux this uses eventfd(); on other systems this diff --git a/src/fish.cpp b/src/fish.cpp index 56dba892e..b602bd8e1 100644 --- a/src/fish.cpp +++ b/src/fish.cpp @@ -39,11 +39,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "ast.h" #include "common.h" +#include "cxxgen.h" #include "env.h" #include "event.h" #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "fds.h" +#include "ffi_init.rs.h" #include "fish_version.h" #include "flog.h" #include "function.h" @@ -59,7 +61,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "path.h" #include "proc.h" #include "reader.h" -#include "signal.h" +#include "signals.h" #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep @@ -261,11 +263,11 @@ static int run_command_list(parser_t &parser, const std::vector &cm for (const auto &cmd : cmds) { wcstring cmd_wcs = str2wcstring(cmd); // Parse into an ast and detect errors. - parse_error_list_t errors; - auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(cmd_wcs, parse_flag_none, &*errors); bool errored = ast.errored(); if (!errored) { - errored = parse_util_detect_errors(ast, cmd_wcs, &errors); + errored = parse_util_detect_errors(ast, cmd_wcs, &*errors); } if (!errored) { // Construct a parsed source ref. @@ -275,7 +277,7 @@ static int run_command_list(parser_t &parser, const std::vector &cm parser.eval(ps, io); } else { wcstring sb; - parser.get_backtrace(cmd_wcs, errors, sb); + parser.get_backtrace(cmd_wcs, *errors, sb); std::fwprintf(stderr, L"%ls", sb.c_str()); } } @@ -319,6 +321,7 @@ static int fish_parse_opt(int argc, char **argv, fish_cmd_opts_t *opts) { } case 'd': { activate_flog_categories_by_pattern(str2wcstring(optarg)); + rust_activate_flog_categories_by_pattern(str2wcstring(optarg).c_str()); for (auto cat : get_flog_categories()) { if (cat->enabled) { std::fwprintf(stdout, L"Debug enabled for category: %ls\n", cat->name); @@ -427,6 +430,7 @@ int main(int argc, char **argv) { program_name = L"fish"; set_main_thread(); setup_fork_guards(); + rust_init(); signal_unblock_all(); setlocale(LC_ALL, ""); @@ -496,10 +500,10 @@ int main(int argc, char **argv) { // command line takes precedence). if (auto features_var = env_stack_t::globals().get(L"fish_features")) { for (const wcstring &s : features_var->as_list()) { - mutable_fish_features().set_from_string(s); + mutable_fish_features()->set_from_string(s.c_str()); } } - mutable_fish_features().set_from_string(opts.features); + mutable_fish_features()->set_from_string(opts.features.c_str()); proc_init(); misc_init(); reader_init(); diff --git a/src/fish_indent.cpp b/src/fish_indent.cpp index 142786608..a146efbf0 100644 --- a/src/fish_indent.cpp +++ b/src/fish_indent.cpp @@ -40,6 +40,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA #include "env.h" #include "expand.h" #include "fds.h" +#include "ffi_init.rs.h" #include "fish_version.h" #include "flog.h" #include "future_feature_flags.h" @@ -194,7 +195,7 @@ struct pretty_printer_t { p = p->parent; assert(p->type == type_t::statement); p = p->parent; - if (auto job = p->try_as()) { + if (auto job = p->try_as()) { if (!job->variables.empty()) result |= allow_escaped_newlines; } else if (auto job_cnt = p->try_as()) { if (!job_cnt->variables.empty()) result |= allow_escaped_newlines; @@ -419,9 +420,9 @@ struct pretty_printer_t { // always emit one. bool needs_nl = false; - tokenizer_t tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); - while (maybe_t tok = tokenizer.next()) { - wcstring tok_text = tokenizer.text_of(*tok); + auto tokenizer = new_tokenizer(gap_text.c_str(), TOK_SHOW_COMMENTS | TOK_SHOW_BLANK_LINES); + while (auto tok = tokenizer->next()) { + wcstring tok_text = *tokenizer->text_of(*tok); if (needs_nl) { emit_newline(); @@ -433,11 +434,11 @@ struct pretty_printer_t { if (tok_text == L"\n") continue; } - if (tok->type == token_type_t::comment) { + if (tok->type_ == token_type_t::comment) { emit_space_or_indent(); output.append(tok_text); needs_nl = true; - } else if (tok->type == token_type_t::end) { + } else if (tok->type_ == token_type_t::end) { // This may be either a newline or semicolon. // Semicolons found here are not part of the ast and can simply be removed. // Newlines are preserved unless mask_newline is set. @@ -448,7 +449,7 @@ struct pretty_printer_t { fprintf(stderr, "Gap text should only have comments and newlines - instead found token " "type %d with text: %ls\n", - (int)tok->type, tok_text.c_str()); + (int)tok->type_, tok_text.c_str()); DIE("Gap text should only have comments and newlines"); } } @@ -873,6 +874,7 @@ int main(int argc, char *argv[]) { program_name = L"fish_indent"; set_main_thread(); setup_fork_guards(); + rust_init(); // Using the user's default locale could be a problem if it doesn't use UTF-8 encoding. That's // because the fish project assumes Unicode UTF-8 encoding in all of its scripts. // @@ -884,7 +886,7 @@ int main(int argc, char *argv[]) { if (auto features_var = env_stack_t::globals().get(L"fish_features")) { for (const wcstring &s : features_var->as_list()) { - mutable_fish_features().set_from_string(s); + mutable_fish_features()->set_from_string(s.c_str()); } } diff --git a/src/fish_key_reader.cpp b/src/fish_key_reader.cpp index db3c1ac93..1e1cb79ba 100644 --- a/src/fish_key_reader.cpp +++ b/src/fish_key_reader.cpp @@ -20,8 +20,10 @@ #include #include "common.h" +#include "cxxgen.h" #include "env.h" #include "fallback.h" // IWYU pragma: keep +#include "ffi_init.rs.h" #include "fish_version.h" #include "input.h" #include "input_common.h" @@ -30,7 +32,7 @@ #include "print_help.h" #include "proc.h" #include "reader.h" -#include "signal.h" +#include "signals.h" #include "wutil.h" // IWYU pragma: keep struct config_paths_t determine_config_directory_paths(const char *argv0); @@ -271,6 +273,7 @@ static void process_input(bool continuous_mode, bool verbose) { set_interactive_session(true); set_main_thread(); setup_fork_guards(); + rust_init(); env_init(); reader_init(); parser_t &parser = parser_t::principal_parser(); diff --git a/src/fish_test_helper.cpp b/src/fish_test_helper.cpp index 06689eeca..dbb7390f9 100644 --- a/src/fish_test_helper.cpp +++ b/src/fish_test_helper.cpp @@ -2,6 +2,7 @@ // programs, allowing fish to test its behavior. #include +#include #include #include diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp index ac7a59187..2642541bb 100644 --- a/src/fish_tests.cpp +++ b/src/fish_tests.cpp @@ -53,13 +53,17 @@ #include "color.h" #include "common.h" #include "complete.h" +#include "cxxgen.h" #include "enum_set.h" #include "env.h" #include "env_universal_common.h" #include "expand.h" #include "fallback.h" // IWYU pragma: keep #include "fd_monitor.h" +#include "fd_readable_set.rs.h" #include "fds.h" +#include "ffi_init.rs.h" +#include "ffi_tests.rs.h" #include "function.h" #include "future_feature_flags.h" #include "global_safety.h" @@ -85,9 +89,9 @@ #include "reader.h" #include "redirection.h" #include "screen.h" -#include "signal.h" +#include "signals.h" +#include "smoke.rs.h" #include "termsize.h" -#include "timer.h" #include "tokenizer.h" #include "topic_monitor.h" #include "utf8.h" @@ -635,25 +639,25 @@ static void test_tokenizer() { say(L"Testing tokenizer"); { const wchar_t *str = L"alpha beta"; - tokenizer_t t(str, 0); - maybe_t token{}; + auto t = new_tokenizer(str, 0); + std::unique_ptr token{}; - token = t.next(); // alpha - do_test(token.has_value()); - do_test(token->type == token_type_t::string); + token = t->next(); // alpha + do_test(token); + do_test(token->type_ == token_type_t::string); do_test(token->offset == 0); do_test(token->length == 5); - do_test(t.text_of(*token) == L"alpha"); + do_test(*t->text_of(*token) == L"alpha"); - token = t.next(); // beta - do_test(token.has_value()); - do_test(token->type == token_type_t::string); + token = t->next(); // beta + do_test(token); + do_test(token->type_ == token_type_t::string); do_test(token->offset == 6); do_test(token->length == 4); - do_test(t.text_of(*token) == L"beta"); + do_test(*t->text_of(*token) == L"beta"); - token = t.next(); - do_test(!token.has_value()); + token = t->next(); + do_test(!token); } const wchar_t *str = @@ -673,21 +677,21 @@ static void test_tokenizer() { say(L"Test correct tokenization"); { - tokenizer_t t(str, 0); + auto t = new_tokenizer(str, 0); size_t i = 0; - while (auto token = t.next()) { + while (auto token = t->next()) { if (i >= sizeof types / sizeof *types) { err(L"Too many tokens returned from tokenizer"); - std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type); + std::fwprintf(stdout, L"Got excess token type %ld\n", (long)token->type_); break; } - if (types[i] != token->type) { + if (types[i] != token->type_) { err(L"Tokenization error:"); std::fwprintf( stdout, L"Token number %zu of string \n'%ls'\n, expected type %ld, got token type " L"%ld\n", - i + 1, str, (long)types[i], (long)token->type); + i + 1, str, (long)types[i], (long)token->type_); } i++; } @@ -698,50 +702,50 @@ static void test_tokenizer() { // Test some errors. { - tokenizer_t t(L"abc\\", 0); - auto token = t.next(); - do_test(token.has_value()); - do_test(token->type == token_type_t::error); + auto t = new_tokenizer(L"abc\\", 0); + auto token = t->next(); + do_test(token); + do_test(token->type_ == token_type_t::error); do_test(token->error == tokenizer_error_t::unterminated_escape); do_test(token->error_offset_within_token == 3); } { - tokenizer_t t(L"abc )defg(hij", 0); - auto token = t.next(); - do_test(token.has_value()); - token = t.next(); - do_test(token.has_value()); - do_test(token->type == token_type_t::error); + auto t = new_tokenizer(L"abc )defg(hij", 0); + auto token = t->next(); + do_test(token); + token = t->next(); + do_test(token); + do_test(token->type_ == token_type_t::error); do_test(token->error == tokenizer_error_t::closing_unopened_subshell); do_test(token->offset == 4); do_test(token->error_offset_within_token == 0); } { - tokenizer_t t(L"abc defg(hij (klm)", 0); - auto token = t.next(); - do_test(token.has_value()); - token = t.next(); - do_test(token.has_value()); - do_test(token->type == token_type_t::error); + auto t = new_tokenizer(L"abc defg(hij (klm)", 0); + auto token = t->next(); + do_test(token); + token = t->next(); + do_test(token); + do_test(token->type_ == token_type_t::error); do_test(token->error == tokenizer_error_t::unterminated_subshell); do_test(token->error_offset_within_token == 4); } { - tokenizer_t t(L"abc defg[hij (klm)", 0); - auto token = t.next(); - do_test(token.has_value()); - token = t.next(); - do_test(token.has_value()); - do_test(token->type == token_type_t::error); + auto t = new_tokenizer(L"abc defg[hij (klm)", 0); + auto token = t->next(); + do_test(token); + token = t->next(); + do_test(token); + do_test(token->type_ == token_type_t::error); do_test(token->error == tokenizer_error_t::unterminated_slice); do_test(token->error_offset_within_token == 4); } // Test some redirection parsing. - auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_t::from_string(s); }; + auto pipe_or_redir = [](const wchar_t *s) { return pipe_or_redir_from_string(s); }; do_test(pipe_or_redir(L"|")->is_pipe); do_test(pipe_or_redir(L"0>|")->is_pipe); do_test(pipe_or_redir(L"0>|")->fd == 0); @@ -765,7 +769,7 @@ static void test_tokenizer() { do_test(pipe_or_redir(L"&>?")->stderr_merge); auto get_redir_mode = [](const wchar_t *s) -> maybe_t { - if (auto redir = pipe_or_redir_t::from_string(s)) { + if (auto redir = pipe_or_redir_from_string(s)) { return redir->mode; } return none(); @@ -844,7 +848,7 @@ static void test_fd_monitor() { constexpr uint64_t usec_per_msec = 1000; // Items which will never receive data or be called back. - item_maker_t item_never(fd_monitor_item_t::kNoTimeout); + item_maker_t item_never(kNoTimeout); item_maker_t item_hugetimeout(100000000LLU * usec_per_msec); // Item which should get no data, and time out. @@ -854,13 +858,13 @@ static void test_fd_monitor() { item_maker_t item42_timeout(16 * usec_per_msec); // Item which should get exactly 42 bytes, and not time out. - item_maker_t item42_nottimeout(fd_monitor_item_t::kNoTimeout); + item_maker_t item42_nottimeout(kNoTimeout); // Item which should get 42 bytes, then get notified it is closed. item_maker_t item42_thenclose(16 * usec_per_msec); // Item which gets one poke. - item_maker_t item_pokee(fd_monitor_item_t::kNoTimeout); + item_maker_t item_pokee(kNoTimeout); // Item which should be called back once. item_maker_t item_oneshot(16 * usec_per_msec); @@ -1515,6 +1519,12 @@ static void test_indents() { 0, "\nend" // ); + tests.clear(); + add_test(&tests, // + 0, "echo 'continuation line' \\", // + 1, "\ncont", // + 0, "\n" // + ); int test_idx = 0; for (const indent_test_t &test : tests) { // Construct the input text and expected indents. @@ -1584,62 +1594,6 @@ static void test_parse_util_cmdsubst_extent() { } } -static struct wcsfilecmp_test { - const wchar_t *str1; - const wchar_t *str2; - int expected_rc; -} wcsfilecmp_tests[] = {{L"", L"", 0}, - {L"", L"def", -1}, - {L"abc", L"", 1}, - {L"abc", L"def", -1}, - {L"abc", L"DEF", -1}, - {L"DEF", L"abc", 1}, - {L"abc", L"abc", 0}, - {L"ABC", L"ABC", 0}, - {L"AbC", L"abc", -1}, - {L"AbC", L"ABC", 1}, - {L"def", L"abc", 1}, - {L"1ghi", L"1gHi", 1}, - {L"1ghi", L"2ghi", -1}, - {L"1ghi", L"01ghi", 1}, - {L"1ghi", L"02ghi", -1}, - {L"01ghi", L"1ghi", -1}, - {L"1ghi", L"002ghi", -1}, - {L"002ghi", L"1ghi", 1}, - {L"abc01def", L"abc1def", -1}, - {L"abc1def", L"abc01def", 1}, - {L"abc12", L"abc5", 1}, - {L"51abc", L"050abc", 1}, - {L"abc5", L"abc12", -1}, - {L"5abc", L"12ABC", -1}, - {L"abc0789", L"abc789", -1}, - {L"abc0xA789", L"abc0xA0789", 1}, - {L"abc002", L"abc2", -1}, - {L"abc002g", L"abc002", 1}, - {L"abc002g", L"abc02g", -1}, - {L"abc002.txt", L"abc02.txt", -1}, - {L"abc005", L"abc012", -1}, - {L"abc02", L"abc002", 1}, - {L"abc002.txt", L"abc02.txt", -1}, - {L"GHI1abc2.txt", L"ghi1abc2.txt", -1}, - {L"a0", L"a00", -1}, - {L"a00b", L"a0b", -1}, - {L"a0b", L"a00b", 1}, - {L"a-b", L"azb", 1}, - {nullptr, nullptr, 0}}; - -/// Verify the behavior of the `wcsfilecmp()` function. -static void test_wcsfilecmp() { - for (auto test = wcsfilecmp_tests; test->str1; test++) { - int rc = wcsfilecmp(test->str1, test->str2); - if (rc != test->expected_rc) { - err(L"New failed on line %lu: [\"%ls\" <=> \"%ls\"]: " - L"expected return code %d but got %d", - __LINE__, test->str1, test->str2, test->expected_rc, rc); - } - } -} - static void test_const_strlen() { do_test(const_strlen("") == 0); do_test(const_strlen(L"") == 0); @@ -1783,7 +1737,6 @@ void test_dir_iter() { static void test_utility_functions() { say(L"Testing utility functions"); - test_wcsfilecmp(); test_parse_util_cmdsubst_extent(); test_const_strlen(); test_const_strcmp(); @@ -1997,28 +1950,6 @@ static void test_utf8() { #endif } -static void test_feature_flags() { - say(L"Testing future feature flags"); - using ft = features_t; - ft f; - f.set_from_string(L"stderr-nocaret,nonsense"); - do_test(f.test(ft::stderr_nocaret)); - f.set_from_string(L"stderr-nocaret,no-stderr-nocaret,nonsense"); - do_test(f.test(ft::stderr_nocaret)); - - // Ensure every metadata is represented once. - size_t counts[ft::flag_count] = {}; - for (const auto &md : ft::metadata) { - counts[md.flag]++; - } - for (size_t c : counts) { - do_test(c == 1); - } - do_test(ft::metadata[ft::stderr_nocaret].name == wcstring(L"stderr-nocaret")); - do_test(ft::metadata_for(L"stderr-nocaret") == &ft::metadata[ft::stderr_nocaret]); - do_test(ft::metadata_for(L"not-a-flag") == nullptr); -} - static void test_escape_sequences() { say(L"Testing escape_sequences"); layout_cache_t lc; @@ -2179,15 +2110,15 @@ static bool expand_test(const wchar_t *in, expand_flags_t flags, ...) { va_list va; bool res = true; wchar_t *arg; - parse_error_list_t errors; + auto errors = new_parse_error_list(); pwd_environment_t pwd{}; operation_context_t ctx{parser_t::principal_parser().shared(), pwd, no_cancel}; - if (expand_string(in, &output, flags, ctx, &errors) == expand_result_t::error) { - if (errors.empty()) { + if (expand_string(in, &output, flags, ctx, &*errors) == expand_result_t::error) { + if (errors->empty()) { err(L"Bug: Parse error reported but no error text found."); } else { - err(L"%ls", errors.at(0).describe(in, ctx.parser->is_interactive()).c_str()); + err(L"%ls", errors->at(0)->describe(in, ctx.parser->is_interactive())->c_str()); } return false; } @@ -2398,14 +2329,14 @@ static void test_expand_overflow() { int set = parser->vars().set(L"bigvar", ENV_LOCAL, std::move(vals)); do_test(set == ENV_OK); - parse_error_list_t errors; + auto errors = new_parse_error_list(); operation_context_t ctx{parser, parser->vars(), no_cancel}; // We accept only 1024 completions. completion_receiver_t output{1024}; - auto res = expand_string(expansion, &output, expand_flags_t{}, ctx, &errors); - do_test(!errors.empty()); + auto res = expand_string(expansion, &output, expand_flags_t{}, ctx, &*errors); + do_test(!errors->empty()); do_test(res == expand_result_t::error); parser->vars().pop(); @@ -2814,11 +2745,11 @@ static void test_1_word_motion(word_motion_t motion, move_word_style_t style, } stops.erase(idx); - move_word_state_machine_t sm(style); + auto sm = new_move_word_state_machine(style); while (idx != end) { size_t char_idx = (motion == word_motion_left ? idx - 1 : idx); wchar_t wc = command.at(char_idx); - bool will_stop = !sm.consume_char(wc); + bool will_stop = !sm->consume_char(wc); // std::fwprintf(stdout, L"idx %lu, looking at %lu (%c): %d\n", idx, char_idx, (char)wc, // will_stop); bool expected_stop = (stops.count(idx) > 0); @@ -2839,7 +2770,7 @@ static void test_1_word_motion(word_motion_t motion, move_word_style_t style, stops.erase(idx); } if (will_stop) { - sm.reset(); + sm->reset(); } else { idx += (motion == word_motion_left ? -1 : 1); } @@ -2849,36 +2780,51 @@ static void test_1_word_motion(word_motion_t motion, move_word_style_t style, /// Test word motion (forward-word, etc.). Carets represent cursor stops. static void test_word_motion() { say(L"Testing word motion"); - test_1_word_motion(word_motion_left, move_word_style_punctuation, L"^echo ^hello_^world.^txt^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_punctuation, + L"^echo ^hello_^world.^txt^"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, L"^echo^ hello^_world^.txt^"); - test_1_word_motion(word_motion_left, move_word_style_punctuation, + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_punctuation, L"echo ^foo_^foo_^foo/^/^/^/^/^ ^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, L"^echo^ foo^_foo^_foo^/^/^/^/^/ ^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^/^foo/^bar/^baz/^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^echo ^--foo ^--bar^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^/^foo/^bar/^baz/^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^echo ^--foo ^--bar^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, L"^echo ^hi ^> ^/^dev/^null^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, L"^echo ^/^foo/^bar{^aaa,^bbb,^ccc}^bak/^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^echo ^bak ^///^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^aaa ^@ ^@^aaa^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^aaa ^a ^@^aaa^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^aaa ^@@@ ^@@^aa^"); - test_1_word_motion(word_motion_left, move_word_style_path_components, L"^aa^@@ ^aa@@^a^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^echo ^bak ^///^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^aaa ^@ ^@^aaa^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^aaa ^a ^@^aaa^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^aaa ^@@@ ^@@^aa^"); + test_1_word_motion(word_motion_left, move_word_style_t::move_word_style_path_components, + L"^aa^@@ ^aa@@^a^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, L"^a^ bcd^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, L"a^b^ cde^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, L"^ab^ cde^"); - test_1_word_motion(word_motion_right, move_word_style_punctuation, L"^ab^&cd^ ^& ^e^ f^&"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, + L"^a^ bcd^"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, + L"a^b^ cde^"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, + L"^ab^ cde^"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_punctuation, + L"^ab^&cd^ ^& ^e^ f^&"); - test_1_word_motion(word_motion_right, move_word_style_whitespace, L"^^a-b-c^ d-e-f"); - test_1_word_motion(word_motion_right, move_word_style_whitespace, L"^a-b-c^\n d-e-f^ "); - test_1_word_motion(word_motion_right, move_word_style_whitespace, L"^a-b-c^\n\nd-e-f^ "); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_whitespace, + L"^^a-b-c^ d-e-f"); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_whitespace, + L"^a-b-c^\n d-e-f^ "); + test_1_word_motion(word_motion_right, move_word_style_t::move_word_style_whitespace, + L"^a-b-c^\n\nd-e-f^ "); } /// Test is_potential_path. @@ -3139,7 +3085,7 @@ static void test_dup2s() { io_chain_t chain; chain.push_back(make_shared(17)); chain.push_back(make_shared(3, 19)); - auto list = dup2_list_t::resolve_chain(chain); + auto list = dup2_list_resolve_chain_shim(chain); do_test(list.get_actions().size() == 2); auto act1 = list.get_actions().at(0); @@ -3160,7 +3106,7 @@ static void test_dup2s_fd_for_target_fd() { chain.push_back(make_shared(5, 8)); chain.push_back(make_shared(1, 4)); chain.push_back(make_shared(3, 5)); - auto list = dup2_list_t::resolve_chain(chain); + auto list = dup2_list_resolve_chain_shim(chain); do_test(list.fd_for_target_fd(3) == 8); do_test(list.fd_for_target_fd(5) == 8); @@ -3295,15 +3241,15 @@ static void test_wildcards() { unescape_string_in_place(&wc, UNESCAPE_SPECIAL); do_test(!wildcard_has(wc) && wildcard_has_internal(wc)); - auto &feat = mutable_fish_features(); - auto saved = feat.test(features_t::flag_t::qmark_noglob); - feat.set(features_t::flag_t::qmark_noglob, false); + auto feat = mutable_fish_features(); + auto saved = feat->test(feature_flag_t::qmark_noglob); + feat->set(feature_flag_t::qmark_noglob, false); do_test(wildcard_has(L"?")); do_test(!wildcard_has(L"\\?")); - feat.set(features_t::flag_t::qmark_noglob, true); + feat->set(feature_flag_t::qmark_noglob, true); do_test(!wildcard_has(L"?")); do_test(!wildcard_has(L"\\?")); - feat.set(features_t::flag_t::qmark_noglob, saved); + feat->set(feature_flag_t::qmark_noglob, saved); } static void test_complete() { @@ -4289,7 +4235,7 @@ bool poll_notifier(const std::unique_ptr ¬e) { bool result = false; int fd = note->notification_fd(); - if (fd >= 0 && fd_readable_set_t::poll_fd_readable(fd)) { + if (fd >= 0 && poll_fd_readable(fd)) { result = note->notification_fd_became_readable(fd); } return result; @@ -5039,7 +4985,7 @@ static void test_new_parser_fuzzing() { wcstring src; src.reserve(128); - parse_error_list_t errors; + auto errors = new_parse_error_list(); double start = timef(); bool log_it = true; @@ -5063,7 +5009,7 @@ static void test_new_parser_fuzzing() { // Parse a statement, returning the command, args (joined by spaces), and the decoration. Returns // true if successful. static bool test_1_parse_ll2(const wcstring &src, wcstring *out_cmd, wcstring *out_joined_args, - enum statement_decoration_t *out_deco) { + statement_decoration_t *out_deco) { using namespace ast; out_cmd->clear(); out_joined_args->clear(); @@ -5136,7 +5082,7 @@ static void test_new_parser_ll2() { wcstring src; wcstring cmd; wcstring args; - enum statement_decoration_t deco; + statement_decoration_t deco; } tests[] = {{L"echo hello", L"echo", L"hello", statement_decoration_t::none}, {L"command echo hello", L"echo", L"hello", statement_decoration_t::command}, {L"exec echo hello", L"echo", L"hello", statement_decoration_t::exec}, @@ -5153,7 +5099,7 @@ static void test_new_parser_ll2() { for (const auto &test : tests) { wcstring cmd, args; - enum statement_decoration_t deco = statement_decoration_t::none; + statement_decoration_t deco = statement_decoration_t::none; bool success = test_1_parse_ll2(test.src, &cmd, &args, &deco); if (!success) err(L"Parse of '%ls' failed on line %ld", test.cmd.c_str(), (long)__LINE__); if (cmd != test.cmd) @@ -5209,17 +5155,20 @@ static void test_new_parser_ad_hoc() { ast = ast_t::parse(L"a=", parse_flag_leave_unterminated); do_test(!ast.errored()); - parse_error_list_t errors; - ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && errors.at(0).code == parse_error_tokenizer_unterminated_subshell); + auto errors = new_parse_error_list(); + ast = ast_t::parse(L"begin; echo (", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); - errors.clear(); - ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && errors.at(0).code == parse_error_tokenizer_unterminated_subshell); + errors->clear(); + ast = ast_t::parse(L"for x in (", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_subshell); - errors.clear(); - ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &errors); - do_test(errors.size() == 1 && errors.at(0).code == parse_error_tokenizer_unterminated_quote); + errors->clear(); + ast = ast_t::parse(L"begin; echo '", parse_flag_leave_unterminated, &*errors); + do_test(errors->size() == 1 && + errors->at(0)->code() == parse_error_code_t::tokenizer_unterminated_quote); } static void test_new_parser_errors() { @@ -5228,46 +5177,46 @@ static void test_new_parser_errors() { const wchar_t *src; parse_error_code_t code; } tests[] = { - {L"echo 'abc", parse_error_tokenizer_unterminated_quote}, - {L"'", parse_error_tokenizer_unterminated_quote}, - {L"echo (abc", parse_error_tokenizer_unterminated_subshell}, + {L"echo 'abc", parse_error_code_t::tokenizer_unterminated_quote}, + {L"'", parse_error_code_t::tokenizer_unterminated_quote}, + {L"echo (abc", parse_error_code_t::tokenizer_unterminated_subshell}, - {L"end", parse_error_unbalancing_end}, - {L"echo hi ; end", parse_error_unbalancing_end}, + {L"end", parse_error_code_t::unbalancing_end}, + {L"echo hi ; end", parse_error_code_t::unbalancing_end}, - {L"else", parse_error_unbalancing_else}, - {L"if true ; end ; else", parse_error_unbalancing_else}, + {L"else", parse_error_code_t::unbalancing_else}, + {L"if true ; end ; else", parse_error_code_t::unbalancing_else}, - {L"case", parse_error_unbalancing_case}, - {L"if true ; case ; end", parse_error_generic}, + {L"case", parse_error_code_t::unbalancing_case}, + {L"if true ; case ; end", parse_error_code_t::generic}, - {L"true | and", parse_error_andor_in_pipeline}, + {L"true | and", parse_error_code_t::andor_in_pipeline}, - {L"a=", parse_error_bare_variable_assignment}, + {L"a=", parse_error_code_t::bare_variable_assignment}, }; for (const auto &test : tests) { const wcstring src = test.src; parse_error_code_t expected_code = test.code; - parse_error_list_t errors; - auto ast = ast::ast_t::parse(src, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(src, parse_flag_none, &*errors); if (!ast.errored()) { err(L"Source '%ls' was expected to fail to parse, but succeeded", src.c_str()); } - if (errors.size() != 1) { + if (errors->size() != 1) { err(L"Source '%ls' was expected to produce 1 error, but instead produced %lu errors", - src.c_str(), errors.size()); - for (const auto &err : errors) { - fprintf(stderr, "%ls\n", err.describe(src, false).c_str()); + src.c_str(), errors->size()); + for (size_t i = 0; i < errors->size(); i++) { + fprintf(stderr, "%ls\n", errors->at(i)->describe(src, false)->c_str()); } - } else if (errors.at(0).code != expected_code) { + } else if (errors->at(0)->code() != expected_code) { err(L"Source '%ls' was expected to produce error code %lu, but instead produced error " L"code %lu", - src.c_str(), expected_code, (unsigned long)errors.at(0).code); - for (const auto &error : errors) { - err(L"\t\t%ls", error.describe(src, true).c_str()); + src.c_str(), expected_code, (unsigned long)errors->at(0)->code()); + for (size_t i = 0; i < errors->size(); i++) { + err(L"\t\t%ls", errors->at(i)->describe(src, true)->c_str()); } } } @@ -5360,13 +5309,14 @@ static void test_error_messages() { {L"echo \"foo\"$\"bar\"", ERROR_NO_VAR_NAME}, {L"echo foo $ bar", ERROR_NO_VAR_NAME}}; - parse_error_list_t errors; + auto errors = new_parse_error_list(); for (const auto &test : error_tests) { - errors.clear(); - parse_util_detect_errors(test.src, &errors); - do_test(!errors.empty()); - if (!errors.empty()) { - do_test1(string_matches_format(errors.at(0).text, test.error_text_format), test.src); + errors->clear(); + parse_util_detect_errors(test.src, &*errors); + do_test(!errors->empty()); + if (!errors->empty()) { + do_test1(string_matches_format(*errors->at(0)->text(), test.error_text_format), + test.src); } } } @@ -5764,8 +5714,16 @@ static void test_highlighting() { {L"\\U110000", highlight_role_t::error}, }); #endif - const auto saved_flags = fish_features(); - mutable_fish_features().set(features_t::ampersand_nobg_in_token, true); + + highlight_tests.clear(); + highlight_tests.push_back({ + {L"echo", highlight_role_t::command}, + {L"stuff", highlight_role_t::param}, + {L"# comment", highlight_role_t::comment}, + }); + + bool saved_flag = feature_test(feature_flag_t::ampersand_nobg_in_token); + mutable_fish_features()->set(feature_flag_t::ampersand_nobg_in_token, true); for (const highlight_component_list_t &components : highlight_tests) { // Generate the text. wcstring text; @@ -5810,7 +5768,7 @@ static void test_highlighting() { } } } - mutable_fish_features() = saved_flags; + mutable_fish_features()->set(feature_flag_t::ampersand_nobg_in_token, saved_flag); vars.remove(L"VARIABLE_IN_COMMAND", ENV_DEFAULT); vars.remove(L"VARIABLE_IN_COMMAND2", ENV_DEFAULT); } @@ -6262,7 +6220,7 @@ static void test_string() { run_one_string_test(t.argv, t.expected_rc, t.expected_out); } - const auto saved_flags = fish_features(); + bool saved_flag = feature_test(feature_flag_t::qmark_noglob); const struct string_test qmark_noglob_tests[] = { {{L"string", L"match", L"a*b?c", L"axxb?c", nullptr}, STATUS_CMD_OK, L"axxb?c\n"}, {{L"string", L"match", L"*?", L"a", nullptr}, STATUS_CMD_ERROR, L""}, @@ -6270,7 +6228,7 @@ static void test_string() { {{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_ERROR, L""}, {{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_ERROR, L""}, {{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_ERROR, L""}}; - mutable_fish_features().set(features_t::qmark_noglob, true); + mutable_fish_features()->set(feature_flag_t::qmark_noglob, true); for (const auto &t : qmark_noglob_tests) { run_one_string_test(t.argv, t.expected_rc, t.expected_out); } @@ -6282,11 +6240,11 @@ static void test_string() { {{L"string", L"match", L"?*", L"a", nullptr}, STATUS_CMD_OK, L"a\n"}, {{L"string", L"match", L"?*", L"ab", nullptr}, STATUS_CMD_OK, L"ab\n"}, {{L"string", L"match", L"a*\\?", L"abc?", nullptr}, STATUS_CMD_OK, L"abc?\n"}}; - mutable_fish_features().set(features_t::qmark_noglob, false); + mutable_fish_features()->set(feature_flag_t::qmark_noglob, false); for (const auto &t : qmark_glob_tests) { run_one_string_test(t.argv, t.expected_rc, t.expected_out); } - mutable_fish_features() = saved_flags; + mutable_fish_features()->set(feature_flag_t::qmark_noglob, saved_flag); } /// Helper for test_timezone_env_vars(). @@ -6682,7 +6640,8 @@ void test_dirname_basename() { static void test_topic_monitor() { say(L"Testing topic monitor"); - topic_monitor_t monitor; + auto monitor_box = new_topic_monitor(); + topic_monitor_t &monitor = *monitor_box; generation_list_t gens{}; constexpr auto t = topic_t::sigchld; gens.sigchld = 0; @@ -6706,12 +6665,13 @@ static void test_topic_monitor() { static void test_topic_monitor_torture() { say(L"Torture-testing topic monitor"); - topic_monitor_t monitor; + auto monitor_box = new_topic_monitor(); + topic_monitor_t &monitor = *monitor_box; const size_t thread_count = 64; constexpr auto t1 = topic_t::sigchld; constexpr auto t2 = topic_t::sighupint; std::vector gens; - gens.resize(thread_count, generation_list_t::invalids()); + gens.resize(thread_count, invalid_generations()); std::atomic post_count{}; for (auto &gen : gens) { gen = monitor.current_generations(); @@ -6791,41 +6751,6 @@ static void test_fd_event_signaller() { do_test(!sema.try_consume()); } -static void test_timer_format() { - say(L"Testing timer format"); - // This test uses numeric output, so we need to set the locale. - char *saved_locale = strdup(std::setlocale(LC_NUMERIC, nullptr)); - std::setlocale(LC_NUMERIC, "C"); - auto t1 = timer_snapshot_t::take(); - t1.cpu_fish.ru_utime.tv_usec = 0; - t1.cpu_fish.ru_stime.tv_usec = 0; - t1.cpu_children.ru_utime.tv_usec = 0; - t1.cpu_children.ru_stime.tv_usec = 0; - auto t2 = t1; - t2.cpu_fish.ru_utime.tv_usec = 999995; - t2.cpu_fish.ru_stime.tv_usec = 999994; - t2.cpu_children.ru_utime.tv_usec = 1000; - t2.cpu_children.ru_stime.tv_usec = 500; - t2.wall += std::chrono::microseconds(500); - auto expected = - LR"( -________________________________________________________ -Executed in 500.00 micros fish external - usr time 1.00 secs 1.00 secs 1.00 millis - sys time 1.00 secs 1.00 secs 0.50 millis -)"; // (a) (b) (c) - // (a) remaining columns should align even if there are different units - // (b) carry to the next unit when it would overflow %6.2F - // (c) carry to the next unit when the larger one exceeds 1000 - std::wstring actual = timer_snapshot_t::print_delta(t1, t2, true); - if (actual != expected) { - err(L"Failed to format timer snapshot\nExpected: %ls\nActual:%ls\n", expected, - actual.c_str()); - } - std::setlocale(LC_NUMERIC, saved_locale); - free(saved_locale); -} - static void test_killring() { say(L"Testing killring"); @@ -7119,7 +7044,6 @@ void test_wgetopt() { } case '?': { // unrecognized option - fprintf(stderr, "got arg %d\n", w.woptind - 1); if (argv[w.woptind - 1]) { do_test(argv[w.woptind - 1] != nullptr); arguments.push_back(argv[w.woptind - 1]); @@ -7137,6 +7061,13 @@ void test_wgetopt() { do_test(join_strings(arguments, L' ') == L"emacsnw emacs -nw"); } +void test_rust_smoke() { + size_t x = rust::add(37, 5); + do_test(x == 42); +} + +void test_rust_ffi() { rust::run_ffi_tests(); } + // typedef void (test_entry_point_t)(); using test_entry_point_t = void (*)(); struct test_t { @@ -7191,7 +7122,6 @@ static const test_t s_tests[]{ {TEST_GROUP("cancellation"), test_cancellation}, {TEST_GROUP("indents"), test_indents}, {TEST_GROUP("utf8"), test_utf8}, - {TEST_GROUP("feature_flags"), test_feature_flags}, {TEST_GROUP("escape_sequences"), test_escape_sequences}, {TEST_GROUP("pcre2_escape"), test_pcre2_escape}, {TEST_GROUP("lru"), test_lru}, @@ -7247,7 +7177,6 @@ static const test_t s_tests[]{ {TEST_GROUP("topics"), test_topic_monitor_torture}, {TEST_GROUP("pipes"), test_pipes}, {TEST_GROUP("fd_event"), test_fd_event_signaller}, - {TEST_GROUP("timer_format"), test_timer_format}, {TEST_GROUP("termsize"), termsize_tester_t::test}, {TEST_GROUP("killring"), test_killring}, {TEST_GROUP("re"), test_re_errs}, @@ -7256,8 +7185,9 @@ static const test_t s_tests[]{ {TEST_GROUP("re"), test_re_named}, {TEST_GROUP("re"), test_re_name_extraction}, {TEST_GROUP("re"), test_re_substitute}, - {TEST_GROUP("re"), test_re_substitute}, {TEST_GROUP("wgetopt"), test_wgetopt}, + {TEST_GROUP("rust_smoke"), test_rust_smoke}, + {TEST_GROUP("rust_ffi"), test_rust_ffi}, }; void list_tests() { @@ -7312,6 +7242,7 @@ int main(int argc, char **argv) { say(L"Testing low-level functionality"); set_main_thread(); setup_fork_guards(); + rust_init(); proc_init(); env_init(); misc_init(); diff --git a/src/flog.cpp b/src/flog.cpp index f5e3b887d..b6f0ee61d 100644 --- a/src/flog.cpp +++ b/src/flog.cpp @@ -180,6 +180,8 @@ void set_flog_output_file(FILE *f) { void log_extra_to_flog_file(const wcstring &s) { g_logger.acquire()->log_extra(s.c_str()); } +int get_flog_file_fd() { return s_flog_file_fd; } + std::vector get_flog_categories() { std::vector result(s_all_categories.begin(), s_all_categories.end()); std::sort(result.begin(), result.end(), [](const category_t *a, const category_t *b) { diff --git a/src/flog.h b/src/flog.h index 4a3627f3f..085be6d78 100644 --- a/src/flog.h +++ b/src/flog.h @@ -197,6 +197,10 @@ std::vector get_flog_categories(); /// This is used by the tracing machinery. void log_extra_to_flog_file(const wcstring &s); +/// \return the FD for the flog file. +/// This is exposed for the Rust bridge. +int get_flog_file_fd(); + /// Output to the fish log a sequence of arguments, separated by spaces, and ending with a newline. /// We save and restore errno because we don't want this to affect other code. #define FLOG(wht, ...) \ diff --git a/src/function.cpp b/src/function.cpp index 48bab6b4e..eee67870d 100644 --- a/src/function.cpp +++ b/src/function.cpp @@ -28,7 +28,7 @@ #include "parse_constants.h" #include "parser.h" #include "parser_keywords.h" -#include "signal.h" +#include "signals.h" #include "wcstringutil.h" #include "wutil.h" // IWYU pragma: keep diff --git a/src/future_feature_flags.cpp b/src/future_feature_flags.cpp deleted file mode 100644 index ac92b705a..000000000 --- a/src/future_feature_flags.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "future_feature_flags.h" - -#include -#include - -#include "wcstringutil.h" - -features_t::features_t() { - for (const metadata_t &md : metadata) { - this->set(md.flag, md.default_value); - } -} - -/// The set of features applying to this instance. -features_t features_t::global_features; - -const features_t::metadata_t features_t::metadata[features_t::flag_count] = { - {stderr_nocaret, L"stderr-nocaret", L"3.0", - L"^ no longer redirects stderr (historical, can no longer be changed)", true, - true /* read-only */}, - {qmark_noglob, L"qmark-noglob", L"3.0", L"? no longer globs", false, false}, - {string_replace_backslash, L"regex-easyesc", L"3.1", L"string replace -r needs fewer \\'s", - true, false}, - {ampersand_nobg_in_token, L"ampersand-nobg-in-token", L"3.4", - L"& only backgrounds if followed by a separator", true, false}, -}; - -const struct features_t::metadata_t *features_t::metadata_for(const wchar_t *name) { - assert(name && "null flag name"); - for (const auto &md : metadata) { - if (!std::wcscmp(name, md.name)) return &md; - } - return nullptr; -} - -void features_t::set_from_string(const wcstring &str) { - wcstring_list_t entries = split_string(str, L','); - const wchar_t *whitespace = L"\t\n\v\f\r "; - for (wcstring entry : entries) { - if (entry.empty()) continue; - - // Trim leading and trailing whitespace - entry.erase(0, entry.find_first_not_of(whitespace)); - entry.erase(entry.find_last_not_of(whitespace) + 1); - - const wchar_t *name = entry.c_str(); - bool value = true; - // A "no-" prefix inverts the sense. - if (string_prefixes_string(L"no-", name)) { - value = false; - name += const_strlen("no-"); - } - // Look for a feature with this name. If we don't find it, assume it's a group name and set - // all features whose group contain it. Do nothing even if the string is unrecognized; this - // is to allow uniform invocations of fish (e.g. disable a feature that is only present in - // future versions). - // The special name 'all' may be used for those who like to live on the edge. - if (const metadata_t *md = metadata_for(name)) { - // Only change it if it's not read-only. - // Don't complain if it is, this is typically set from a variable. - if (!md->read_only) { - this->set(md->flag, value); - } - } else { - for (const metadata_t &md : metadata) { - if (std::wcsstr(md.groups, name) || !std::wcscmp(name, L"all")) { - if (!md.read_only) { - this->set(md.flag, value); - } - } - } - } - } -} diff --git a/src/future_feature_flags.h b/src/future_feature_flags.h index 9e604b724..29f91f0c2 100644 --- a/src/future_feature_flags.h +++ b/src/future_feature_flags.h @@ -1,105 +1,8 @@ -// Flags to enable upcoming features #ifndef FISH_FUTURE_FEATURE_FLAGS_H #define FISH_FUTURE_FEATURE_FLAGS_H -#include - -#include "common.h" - -class features_t { - public: - /// The list of flags. - enum flag_t { - /// Whether ^ is supported for stderr redirection. - stderr_nocaret, - - /// Whether ? is supported as a glob. - qmark_noglob, - - /// Whether string replace -r double-unescapes the replacement. - string_replace_backslash, - - /// Whether "&" is not-special if followed by a word character. - ampersand_nobg_in_token, - - /// The number of flags. - flag_count - }; - - /// Return whether a flag is set. - bool test(flag_t f) const { - assert(f >= 0 && f < flag_count && "Invalid flag"); - return values[f].load(std::memory_order_relaxed); - } - - /// Set a flag. - void set(flag_t f, bool value) { - assert(f >= 0 && f < flag_count && "Invalid flag"); - values[f].store(value, std::memory_order_relaxed); - } - - /// Parses a comma-separated feature-flag string, updating ourselves with the values. - /// Feature names or group names may be prefixed with "no-" to disable them. - /// The special group name "all" may be used for those who like to live on the edge. - /// Unknown features are silently ignored. - void set_from_string(const wcstring &str); - - /// Metadata about feature flags. - struct metadata_t { - /// The flag itself. - features_t::flag_t flag; - - /// User-presentable short name of the feature flag. - const wchar_t *name; - - /// Comma-separated list of feature groups. - const wchar_t *groups; - - /// User-presentable description of the feature flag. - const wchar_t *description; - - /// Default flag value. - const bool default_value; - - /// Whether the value can still be changed or not. - const bool read_only; - }; - - /// The metadata, indexed by flag. - static const metadata_t metadata[flag_count]; - - /// Return the metadata for a particular name, or nullptr if not found. - static const struct metadata_t *metadata_for(const wchar_t *name); - - /// The singleton shared feature set. - static features_t global_features; - - features_t(); - - features_t(const features_t &rhs) { *this = rhs; } - - void operator=(const features_t &rhs) { - for (int i = 0; i < flag_count; i++) { - flag_t f = static_cast(i); - this->set(f, rhs.test(f)); - } - } - - private: - // Values for the flags. - // These are atomic to "fix" a race reported by tsan where tests of feature flags and other - // tests which use them conceptually race. - std::atomic values[flag_count]{}; -}; - -/// Return the global set of features for fish. This is const to prevent accidental mutation. -inline const features_t &fish_features() { return features_t::global_features; } - -/// Perform a feature test on the global set of features. -inline bool feature_test(features_t::flag_t f) { return fish_features().test(f); } - -/// Return the global set of features for fish, but mutable. In general fish features should be set -/// at startup only. -inline features_t &mutable_fish_features() { return features_t::global_features; } +#include "future_feature_flags.rs.h" +using feature_flag_t = FeatureFlag; +using features_t = Features; #endif diff --git a/src/highlight.cpp b/src/highlight.cpp index ce0f29977..bfa053d62 100644 --- a/src/highlight.cpp +++ b/src/highlight.cpp @@ -394,7 +394,7 @@ rgb_color_t highlight_color_resolver_t::resolve_spec(const highlight_spec_t &hig return iter->second; } -static bool command_is_valid(const wcstring &cmd, enum statement_decoration_t decoration, +static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, const wcstring &working_directory, const environment_t &vars); static bool has_expand_reserved(const wcstring &str) { @@ -665,7 +665,7 @@ static void color_string_internal(const wcstring &buffstr, highlight_spec_t base break; } case L'?': { - if (!feature_test(features_t::qmark_noglob)) { + if (!feature_test(feature_flag_t::qmark_noglob)) { colors[in_pos] = highlight_role_t::operat; } break; @@ -1057,7 +1057,7 @@ void highlighter_t::visit(const ast::variable_assignment_t &varas) { color_as_argument(varas); // Highlight the '=' in variable assignments as an operator. auto where = variable_assignment_equals_pos(varas.source(this->buff)); - if (where.has_value()) { + if (where) { size_t equals_loc = varas.source_range().start + *where; this->color_array.at(equals_loc) = highlight_role_t::operat; auto var_name = varas.source(this->buff).substr(0, *where); @@ -1079,7 +1079,7 @@ void highlighter_t::visit(const ast::decorated_statement_t &stmt) { if (!this->io_still_ok()) { // We cannot check if the command is invalid, so just assume it's valid. is_valid_cmd = true; - } else if (variable_assignment_equals_pos(*cmd).has_value()) { + } else if (variable_assignment_equals_pos(*cmd)) { is_valid_cmd = true; } else { // Check to see if the command is valid. @@ -1158,12 +1158,10 @@ static bool contains_pending_variable(const std::vector &pending_varia } void highlighter_t::visit(const ast::redirection_t &redir) { - maybe_t oper = - pipe_or_redir_t::from_string(redir.oper.source(this->buff)); // like 2> - wcstring target = redir.target.source(this->buff); // like &1 or file path + auto oper = pipe_or_redir_from_string(redir.oper.source(this->buff).c_str()); // like 2> + wcstring target = redir.target.source(this->buff); // like &1 or file path - assert(oper.has_value() && - "Should have successfully parsed a pipe_or_redir_t since it was in our ast"); + assert(oper && "Should have successfully parsed a pipe_or_redir_t since it was in our ast"); // Color the > part. // It may have parsed successfully yet still be invalid (e.g. 9999999999999>&1) @@ -1305,7 +1303,7 @@ highlighter_t::color_array_t highlighter_t::highlight() { } // namespace /// Determine if a command is valid. -static bool command_is_valid(const wcstring &cmd, enum statement_decoration_t decoration, +static bool command_is_valid(const wcstring &cmd, statement_decoration_t decoration, const wcstring &working_directory, const environment_t &vars) { // Determine which types we check, based on the decoration. bool builtin_ok = true, function_ok = true, abbreviation_ok = true, command_ok = true, diff --git a/src/history.cpp b/src/history.cpp index 6ce463b0e..567316ec7 100644 --- a/src/history.cpp +++ b/src/history.cpp @@ -586,8 +586,8 @@ void history_impl_t::populate_from_file_contents() { if (file_contents) { size_t cursor = 0; maybe_t offset; - while ((offset = - file_contents->offset_of_next_item(&cursor, boundary_timestamp)).has_value()) { + while ((offset = file_contents->offset_of_next_item(&cursor, boundary_timestamp)) + .has_value()) { // Remember this item. old_item_offsets.push_back(*offset); } @@ -1205,9 +1205,9 @@ static bool should_import_bash_history_line(const wcstring &line) { if (ast::ast_t::parse(line).errored()) return false; // In doing this test do not allow incomplete strings. Hence the "false" argument. - parse_error_list_t errors; - parse_util_detect_errors(line, &errors); - return errors.empty(); + auto errors = new_parse_error_list(); + parse_util_detect_errors(line, &*errors); + return errors->empty(); } /// Import a bash command history file. Bash's history format is very simple: just lines with #s for diff --git a/src/input.cpp b/src/input.cpp index 562cf7f33..515106b9f 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -28,8 +28,8 @@ #include "parser.h" #include "proc.h" #include "reader.h" -#include "signal.h" // IWYU pragma: keep -#include "wutil.h" // IWYU pragma: keep +#include "signals.h" // IWYU pragma: keep +#include "wutil.h" // IWYU pragma: keep /// A name for our own key mapping for nul. static const wchar_t *k_nul_mapping_name = L"nul"; diff --git a/src/input_common.cpp b/src/input_common.cpp index d2e6e78c0..bd5eba595 100644 --- a/src/input_common.cpp +++ b/src/input_common.cpp @@ -2,7 +2,7 @@ #include "config.h" #include -#include // IWYU pragma: keep +#include // IWYU pragma: keep #include #include #include @@ -22,6 +22,7 @@ #include "env.h" #include "env_universal_common.h" #include "fallback.h" // IWYU pragma: keep +#include "fd_readable_set.rs.h" #include "fds.h" #include "flog.h" #include "input_common.h" @@ -58,7 +59,8 @@ using readb_result_t = int; static readb_result_t readb(int in_fd) { assert(in_fd >= 0 && "Invalid in fd"); universal_notifier_t& notifier = universal_notifier_t::default_notifier(); - fd_readable_set_t fdset; + auto fdset_box = new_fd_readable_set(); + fd_readable_set_t& fdset = *fdset_box; for (;;) { fdset.clear(); fdset.add(in_fd); @@ -73,7 +75,7 @@ static readb_result_t readb(int in_fd) { // Get its suggested delay (possibly none). // Note a 0 here means do not poll. - uint64_t timeout = fd_readable_set_t::kNoTimeout; + uint64_t timeout = kNoTimeout; if (uint64_t usecs_delay = notifier.usec_delay_between_polls()) { timeout = usecs_delay; } diff --git a/src/io.cpp b/src/io.cpp index 5ba1b3c3b..f8cb64b17 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -214,37 +214,37 @@ bool io_chain_t::append(const io_chain_t &chain) { bool io_chain_t::append_from_specs(const redirection_spec_list_t &specs, const wcstring &pwd) { bool have_error = false; - for (const auto &spec : specs) { - switch (spec.mode) { + for (size_t i = 0; i < specs.size(); i++) { + const redirection_spec_t *spec = specs.at(i); + switch (spec->mode()) { case redirection_mode_t::fd: { - if (spec.is_close()) { - this->push_back(make_unique(spec.fd)); + if (spec->is_close()) { + this->push_back(make_unique(spec->fd())); } else { - auto target_fd = spec.get_target_as_fd(); - assert(target_fd.has_value() && - "fd redirection should have been validated already"); - this->push_back(make_unique(spec.fd, *target_fd)); + auto target_fd = spec->get_target_as_fd(); + assert(target_fd && "fd redirection should have been validated already"); + this->push_back(make_unique(spec->fd(), *target_fd)); } break; } default: { // We have a path-based redireciton. Resolve it to a file. // Mark it as CLO_EXEC because we don't want it to be open in any child. - wcstring path = path_apply_working_directory(spec.target, pwd); - int oflags = spec.oflags(); + wcstring path = path_apply_working_directory(*spec->target(), pwd); + int oflags = spec->oflags(); autoclose_fd_t file{wopen_cloexec(path, oflags, OPEN_MASK)}; if (!file.valid()) { if ((oflags & O_EXCL) && (errno == EEXIST)) { - FLOGF(warning, NOCLOB_ERROR, spec.target.c_str()); + FLOGF(warning, NOCLOB_ERROR, spec->target()->c_str()); } else { if (should_flog(warning)) { - FLOGF(warning, FILE_ERROR, spec.target.c_str()); + FLOGF(warning, FILE_ERROR, spec->target()->c_str()); auto err = errno; // If the error is that the file doesn't exist // or there's a non-directory component, // find the first problematic component for a better message. if (err == ENOENT || err == ENOTDIR) { - auto dname = spec.target; + auto dname = *spec->target(); struct stat buf; while (!dname.empty()) { @@ -269,11 +269,11 @@ bool io_chain_t::append_from_specs(const redirection_spec_list_t &specs, const w // If opening a file fails, insert a closed FD instead of the file redirection // and return false. This lets execution potentially recover and at least gives // the shell a chance to gracefully regain control of the shell (see #7038). - this->push_back(make_unique(spec.fd)); + this->push_back(make_unique(spec->fd())); have_error = true; break; } - this->push_back(std::make_shared(spec.fd, std::move(file))); + this->push_back(std::make_shared(spec->fd(), std::move(file))); break; } } @@ -309,6 +309,15 @@ shared_ptr io_chain_t::io_for_fd(int fd) const { return nullptr; } +dup2_list_t dup2_list_resolve_chain_shim(const io_chain_t &io_chain) { + ASSERT_IS_NOT_FORKED_CHILD(); + std::vector chain; + for (const auto &io_data : io_chain) { + chain.push_back(dup2_action_t{io_data->source_fd, io_data->fd}); + } + return dup2_list_resolve_chain(chain); +} + bool output_stream_t::append_narrow_buffer(const separated_buffer_t &buffer) { for (const auto &rhs_elem : buffer.elements()) { if (!append_with_separation(str2wcstring(rhs_elem.contents), rhs_elem.separation, false)) { @@ -334,6 +343,10 @@ const wcstring &output_stream_t::contents() const { return g_empty_string; } int output_stream_t::flush_and_check_error() { return STATUS_CMD_OK; } +fd_output_stream_t::fd_output_stream_t(int fd) : fd_(fd), sigcheck_(topic_t::sighupint) { + assert(fd_ >= 0 && "Invalid fd"); +} + bool fd_output_stream_t::append(const wchar_t *s, size_t amt) { if (errored_) return false; int res = wwrite_to_fd(s, amt, this->fd_); diff --git a/src/io.h b/src/io.h index bd23eddf4..6908e598f 100644 --- a/src/io.h +++ b/src/io.h @@ -16,7 +16,7 @@ #include "fds.h" #include "global_safety.h" #include "redirection.h" -#include "signal.h" +#include "signals.h" #include "topic_monitor.h" using std::shared_ptr; @@ -346,6 +346,8 @@ class io_chain_t : public std::vector { void print() const; }; +dup2_list_t dup2_list_resolve_chain_shim(const io_chain_t &io_chain); + /// Base class representing the output that a builtin can generate. /// This has various subclasses depending on the ultimate output destination. class output_stream_t : noncopyable_t, nonmovable_t { @@ -413,9 +415,7 @@ class null_output_stream_t final : public output_stream_t { class fd_output_stream_t final : public output_stream_t { public: /// Construct from a file descriptor, which must be nonegative. - explicit fd_output_stream_t(int fd) : fd_(fd), sigcheck_(topic_t::sighupint) { - assert(fd_ >= 0 && "Invalid fd"); - } + explicit fd_output_stream_t(int fd); int flush_and_check_error() override; @@ -496,6 +496,11 @@ struct io_streams_t : noncopyable_t { std::shared_ptr job_group{}; io_streams_t(output_stream_t &out, output_stream_t &err) : out(out), err(err) {} + + /// autocxx junk. + output_stream_t &get_out() { return out; }; + output_stream_t &get_err() { return err; }; + io_streams_t(const io_streams_t &) = delete; }; #endif diff --git a/src/iothread.cpp b/src/iothread.cpp index c9bbe20ee..62fd6c6e4 100644 --- a/src/iothread.cpp +++ b/src/iothread.cpp @@ -16,6 +16,7 @@ #include "common.h" #include "fallback.h" +#include "fd_readable_set.rs.h" #include "fds.h" #include "flog.h" #include "maybe.h" @@ -213,7 +214,7 @@ void iothread_perform_impl(void_function_t &&func, bool cant_wait) { int iothread_port() { return get_notify_signaller().read_fd(); } void iothread_service_main_with_timeout(uint64_t timeout_usec) { - if (fd_readable_set_t::is_fd_readable(iothread_port(), timeout_usec)) { + if (is_fd_readable(iothread_port(), timeout_usec)) { iothread_service_main(); } } diff --git a/src/parse_constants.h b/src/parse_constants.h index f228e90b3..a7c3e75e6 100644 --- a/src/parse_constants.h +++ b/src/parse_constants.h @@ -2,10 +2,7 @@ #ifndef FISH_PARSE_CONSTANTS_H #define FISH_PARSE_CONSTANTS_H -#include "config.h" - #include "common.h" -#include "enum_map.h" using source_offset_t = uint32_t; constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(-1); @@ -16,33 +13,31 @@ constexpr source_offset_t SOURCE_OFFSET_INVALID = static_cast(- exit_without_destructors(-1); \ } while (0) -// A range of source code. +#if INCLUDE_RUST_HEADERS + +#include "parse_constants.rs.h" + +using source_range_t = SourceRange; +using parse_token_type_t = ParseTokenType; +using parse_keyword_t = ParseKeyword; +using statement_decoration_t = StatementDecoration; +using parse_error_code_t = ParseErrorCode; +using pipeline_position_t = PipelinePosition; +using parse_error_list_t = ParseErrorList; + +#else + +// Hacks to allow us to compile without Rust headers. + +#include "config.h" + struct source_range_t { source_offset_t start; source_offset_t length; - - source_offset_t end() const { - assert(start + length >= start && "Overflow"); - return start + length; - } - - bool operator==(const source_range_t &rhs) const { - return start == rhs.start && length == rhs.length; - } - - bool operator!=(const source_range_t &rhs) const { return !(*this == rhs); } - - // \return true if a location is in this range, including one-past-the-end. - bool contains_inclusive(source_offset_t loc) const { - return start <= loc && loc - start <= length; - } }; -// IMPORTANT: If the following enum table is modified you must also update token_enum_map below. enum class parse_token_type_t : uint8_t { invalid = 1, - - // Terminal types. string, pipe, redirection, @@ -50,37 +45,14 @@ enum class parse_token_type_t : uint8_t { andand, oror, end, - // Special terminal type that means no more tokens forthcoming. terminate, - // Very special terminal types that don't appear in the production list. error, tokenizer_error, comment, }; -const enum_map token_enum_map[] = { - {parse_token_type_t::comment, L"parse_token_type_t::comment"}, - {parse_token_type_t::error, L"parse_token_type_t::error"}, - {parse_token_type_t::tokenizer_error, L"parse_token_type_t::tokenizer_error"}, - {parse_token_type_t::background, L"parse_token_type_t::background"}, - {parse_token_type_t::end, L"parse_token_type_t::end"}, - {parse_token_type_t::pipe, L"parse_token_type_t::pipe"}, - {parse_token_type_t::redirection, L"parse_token_type_t::redirection"}, - {parse_token_type_t::string, L"parse_token_type_t::string"}, - {parse_token_type_t::andand, L"parse_token_type_t::andand"}, - {parse_token_type_t::oror, L"parse_token_type_t::oror"}, - {parse_token_type_t::terminate, L"parse_token_type_t::terminate"}, - {parse_token_type_t::invalid, L"parse_token_type_t::invalid"}, - {parse_token_type_t::invalid, nullptr}}; - -// IMPORTANT: If the following enum is modified you must update the corresponding keyword_enum_map -// array below. -// -// IMPORTANT: These enums must start at zero. enum class parse_keyword_t : uint8_t { - // 'none' is not a keyword, it is a sentinel indicating nothing. none, - kw_and, kw_begin, kw_builtin, @@ -101,28 +73,6 @@ enum class parse_keyword_t : uint8_t { kw_while, }; -const enum_map keyword_enum_map[] = {{parse_keyword_t::kw_exclam, L"!"}, - {parse_keyword_t::kw_and, L"and"}, - {parse_keyword_t::kw_begin, L"begin"}, - {parse_keyword_t::kw_builtin, L"builtin"}, - {parse_keyword_t::kw_case, L"case"}, - {parse_keyword_t::kw_command, L"command"}, - {parse_keyword_t::kw_else, L"else"}, - {parse_keyword_t::kw_end, L"end"}, - {parse_keyword_t::kw_exec, L"exec"}, - {parse_keyword_t::kw_for, L"for"}, - {parse_keyword_t::kw_function, L"function"}, - {parse_keyword_t::kw_if, L"if"}, - {parse_keyword_t::kw_in, L"in"}, - {parse_keyword_t::kw_not, L"not"}, - {parse_keyword_t::kw_or, L"or"}, - {parse_keyword_t::kw_switch, L"switch"}, - {parse_keyword_t::kw_time, L"time"}, - {parse_keyword_t::kw_while, L"while"}, - {parse_keyword_t::none, nullptr}}; -#define keyword_enum_map_len (sizeof keyword_enum_map / sizeof *keyword_enum_map) - -// Statement decorations like 'command' or 'exec'. enum class statement_decoration_t : uint8_t { none, command, @@ -130,47 +80,38 @@ enum class statement_decoration_t : uint8_t { exec, }; -// Parse error code list. -enum parse_error_code_t : uint8_t { - parse_error_none, - - // Matching values from enum parser_error. - parse_error_syntax, - parse_error_eval, - parse_error_cmdsubst, - - parse_error_generic, // unclassified error types - - // Tokenizer errors. - parse_error_tokenizer_unterminated_quote, - parse_error_tokenizer_unterminated_subshell, - parse_error_tokenizer_unterminated_slice, - parse_error_tokenizer_unterminated_escape, - parse_error_tokenizer_other, - - parse_error_unbalancing_end, // end outside of block - parse_error_unbalancing_else, // else outside of if - parse_error_unbalancing_case, // case outside of switch - parse_error_bare_variable_assignment, // a=b without command - parse_error_andor_in_pipeline, // "and" or "or" after a pipe +enum class parse_error_code_t : uint8_t { + none, + syntax, + cmdsubst, + generic, + tokenizer_unterminated_quote, + tokenizer_unterminated_subshell, + tokenizer_unterminated_slice, + tokenizer_unterminated_escape, + tokenizer_other, + unbalancing_end, + unbalancing_else, + unbalancing_case, + bare_variable_assignment, + andor_in_pipeline, }; +struct ParseErrorList; +using parse_error_list_t = ParseErrorList; + +#endif + +// Special source_start value that means unknown. +#define SOURCE_LOCATION_UNKNOWN (static_cast(-1)) + enum { parse_flag_none = 0, - - /// Attempt to build a "parse tree" no matter what. This may result in a 'forest' of - /// disconnected trees. This is intended to be used by syntax highlighting. parse_flag_continue_after_error = 1 << 0, - /// Include comment tokens. parse_flag_include_comments = 1 << 1, - /// Indicate that the tokenizer should accept incomplete tokens */ parse_flag_accept_incomplete_tokens = 1 << 2, - /// Indicate that the parser should not generate the terminate token, allowing an 'unfinished' - /// tree where some nodes may have no productions. parse_flag_leave_unterminated = 1 << 3, - /// Indicate that the parser should generate job_list entries for blank lines. parse_flag_show_blank_lines = 1 << 4, - /// Indicate that extra semis should be generated. parse_flag_show_extra_semis = 1 << 5, }; using parse_tree_flags_t = uint8_t; @@ -178,41 +119,6 @@ using parse_tree_flags_t = uint8_t; enum { PARSER_TEST_ERROR = 1, PARSER_TEST_INCOMPLETE = 2 }; using parser_test_error_bits_t = uint8_t; -struct parse_error_t { - /// Text of the error. - wcstring text; - /// Code for the error. - enum parse_error_code_t code; - /// Offset and length of the token in the source code that triggered this error. - size_t source_start; - size_t source_length; - /// Return a string describing the error, suitable for presentation to the user. If - /// is_interactive is true, the offending line with a caret is printed as well. - wcstring describe(const wcstring &src, bool is_interactive) const; - /// Return a string describing the error, suitable for presentation to the user, with the given - /// prefix. If skip_caret is false, the offending line with a caret is printed as well. - wcstring describe_with_prefix(const wcstring &src, const wcstring &prefix, bool is_interactive, - bool skip_caret) const; -}; -typedef std::vector parse_error_list_t; - -wcstring token_type_user_presentable_description(parse_token_type_t type, - parse_keyword_t keyword = parse_keyword_t::none); - -// Special source_start value that means unknown. -#define SOURCE_LOCATION_UNKNOWN (static_cast(-1)) - -/// Helper function to offset error positions by the given amount. This is used when determining -/// errors in a substring of a larger source buffer. -void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt); - -// The location of a pipeline. -enum class pipeline_position_t : uint8_t { - none, // not part of a pipeline - first, // first command in a pipeline - subsequent // second or further command in a pipeline -}; - /// Maximum number of function calls. #define FISH_MAX_STACK_DEPTH 128 diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index 105009511..d51ee1cde 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -39,7 +39,7 @@ #include "path.h" #include "proc.h" #include "reader.h" -#include "timer.h" +#include "timer.rs.h" #include "tokenizer.h" #include "trace.h" #include "wildcard.h" @@ -112,9 +112,9 @@ static wcstring profiling_cmd_name_for_redirectable_block(const ast::node_t &nod } /// Get a redirection from stderr to stdout (i.e. 2>&1). -static redirection_spec_t get_stderr_merge() { +static rust::Box get_stderr_merge() { const wchar_t *stdout_fileno_str = L"1"; - return redirection_spec_t{STDERR_FILENO, redirection_mode_t::fd, stdout_fileno_str}; + return new_redirection_spec(STDERR_FILENO, redirection_mode_t::fd, stdout_fileno_str); } parse_execution_context_t::parse_execution_context_t(parsed_source_ref_t pstree, @@ -150,7 +150,7 @@ parse_execution_context_t::infinite_recursive_statement_in_job_list(const ast::j // Get the first job in the job list. const ast::job_conjunction_t *jc = jobs.at(0); if (!jc) return nullptr; - const ast::job_t *job = &jc->job; + const ast::job_pipeline_t *job = &jc->job; // Helper to return if a statement is infinitely recursive in this function. auto statement_recurses = @@ -245,7 +245,7 @@ maybe_t parse_execution_context_t::check_end_execution() } /// Return whether the job contains a single statement, of block type, with no redirections. -bool parse_execution_context_t::job_is_simple_block(const ast::job_t &job) const { +bool parse_execution_context_t::job_is_simple_block(const ast::job_pipeline_t &job) const { using namespace ast; // Must be no pipes. if (!job.continuation.empty()) { @@ -450,7 +450,8 @@ end_execution_reason_t parse_execution_context_t::run_for_statement( auto var = parser->vars().get(for_var_name, ENV_DEFAULT); if (env_var_t::flags_for(for_var_name.c_str()) & env_var_t::flag_read_only) { return report_error(STATUS_INVALID_ARGS, header.var_name, - _(L"%ls: %ls: cannot overwrite read-only variable"), L"for", for_var_name.c_str()); + _(L"%ls: %ls: cannot overwrite read-only variable"), L"for", + for_var_name.c_str()); } auto &vars = parser->vars(); @@ -502,14 +503,14 @@ end_execution_reason_t parse_execution_context_t::run_switch_statement( // Expand it. We need to offset any errors by the position of the string. completion_list_t switch_values_expanded; - parse_error_list_t errors; + auto errors = new_parse_error_list(); auto expand_ret = - expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, statement.argument.range.start); + expand_string(switch_value, &switch_values_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(statement.argument.range.start); switch (expand_ret.result) { case expand_result_t::error: - return report_errors(expand_ret.status, errors); + return report_errors(expand_ret.status, *errors); case expand_result_t::cancel: return end_execution_reason_t::cancelled; @@ -665,18 +666,20 @@ end_execution_reason_t parse_execution_context_t::report_error(int status, const auto r = node.source_range(); // Create an error. - parse_error_list_t error_list = parse_error_list_t(1); - parse_error_t *error = &error_list.at(0); - error->source_start = r.start; - error->source_length = r.length; - error->code = parse_error_syntax; // hackish + auto error_list = new_parse_error_list(); + parse_error_t error; + error.source_start = r.start; + error.source_length = r.length; + error.code = parse_error_code_t::syntax; // hackish va_list va; va_start(va, fmt); - error->text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); - return this->report_errors(status, error_list); + error_list->push_back(std::move(error)); + + return this->report_errors(status, *error_list); } end_execution_reason_t parse_execution_context_t::report_errors( @@ -735,19 +738,20 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // If the original command did not include a "/", assume we found it via $PATH. auto src = get_source(statement.command); if (src.find(L"/") == wcstring::npos) { - return this->report_error( - STATUS_NOT_EXECUTABLE, statement.command, - _(L"Unknown command. A component of '%ls' is not a directory. Check your $PATH."), cmd); + return this->report_error(STATUS_NOT_EXECUTABLE, statement.command, + _(L"Unknown command. A component of '%ls' is not a " + L"directory. Check your $PATH."), + cmd); } else { return this->report_error( - STATUS_NOT_EXECUTABLE, statement.command, - _(L"Unknown command. A component of '%ls' is not a directory."), cmd); + STATUS_NOT_EXECUTABLE, statement.command, + _(L"Unknown command. A component of '%ls' is not a directory."), cmd); } } return this->report_error( - STATUS_NOT_EXECUTABLE, statement.command, - _(L"Unknown command. '%ls' exists but is not an executable file."), cmd); + STATUS_NOT_EXECUTABLE, statement.command, + _(L"Unknown command. '%ls' exists but is not an executable file."), cmd); } // Handle unrecognized commands with standard command not found handler that can make better @@ -770,7 +774,9 @@ end_execution_reason_t parse_execution_context_t::handle_command_not_found( // Redirect to stderr auto io = io_chain_t{}; - io.append_from_specs({redirection_spec_t{STDOUT_FILENO, redirection_mode_t::fd, L"2"}}, L""); + auto list = new_redirection_spec_list(); + list->push_back(new_redirection_spec(STDOUT_FILENO, redirection_mode_t::fd, L"2")); + io.append_from_specs(*list, L""); if (function_exists(L"fish_command_not_found", *parser)) { buffer = L"fish_command_not_found"; @@ -810,7 +816,7 @@ end_execution_reason_t parse_execution_context_t::expand_command( // Here we're expanding a command, for example $HOME/bin/stuff or $randomthing. The first // completion becomes the command itself, everything after becomes arguments. Command // substitutions are not supported. - parse_error_list_t errors; + auto errors = new_parse_error_list(); // Get the unexpanded command string. We expect to always get it here. wcstring unexp_cmd = get_source(statement.command); @@ -818,14 +824,14 @@ end_execution_reason_t parse_execution_context_t::expand_command( // Expand the string to produce completions, and report errors. expand_result_t expand_err = - expand_to_command_and_args(unexp_cmd, ctx, out_cmd, out_args, &errors); + expand_to_command_and_args(unexp_cmd, ctx, out_cmd, out_args, &*errors); if (expand_err == expand_result_t::error) { // Issue #5812 - the expansions were done on the command token, // excluding prefixes such as " " or "if ". // This means that the error positions are relative to the beginning // of the token; we need to make them relative to the original source. - parse_error_offset_source_start(&errors, pos_of_command_token); - return report_errors(STATUS_ILLEGAL_CMD, errors); + errors->offset_source_start(pos_of_command_token); + return report_errors(STATUS_ILLEGAL_CMD, *errors); } else if (expand_err == expand_result_t::wildcard_no_match) { return report_error(STATUS_UNMATCHED_WILDCARD, statement, WILDCARD_ERR_MSG, get_source(statement).c_str()); @@ -890,7 +896,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( // Produce the full argument list and the set of IO redirections. wcstring_list_t cmd_args; - redirection_spec_list_t redirections; + auto redirections = new_redirection_spec_list(); if (use_implicit_cd) { // Implicit cd is simple. cmd_args = {L"cd", cmd}; @@ -917,7 +923,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( } // The set of IO redirections that we construct for the process. - auto reason = this->determine_redirections(statement.args_or_redirs, &redirections); + auto reason = this->determine_redirections(statement.args_or_redirs, &*redirections); if (reason != end_execution_reason_t::ok) { return reason; } @@ -945,14 +951,14 @@ end_execution_reason_t parse_execution_context_t::expand_arguments_from_nodes( assert(arg_node->has_source() && "Argument should have source"); // Expand this string. - parse_error_list_t errors; + auto errors = new_parse_error_list(); arg_expanded.clear(); auto expand_ret = - expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, arg_node->range.start); + expand_string(get_source(*arg_node), &arg_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(arg_node->range.start); switch (expand_ret.result) { case expand_result_t::error: { - return this->report_errors(expand_ret.status, errors); + return this->report_errors(expand_ret.status, *errors); } case expand_result_t::cancel: { @@ -999,7 +1005,7 @@ end_execution_reason_t parse_execution_context_t::determine_redirections( if (!arg_or_redir.is_redirection()) continue; const ast::redirection_t &redir_node = arg_or_redir.redirection(); - maybe_t oper = pipe_or_redir_t::from_string(get_source(redir_node.oper)); + auto oper = pipe_or_redir_from_string(get_source(redir_node.oper).c_str()); if (!oper || !oper->is_valid()) { // TODO: figure out if this can ever happen. If so, improve this error message. return report_error(STATUS_INVALID_ARGS, redir_node, _(L"Invalid redirection: %ls"), @@ -1018,14 +1024,14 @@ end_execution_reason_t parse_execution_context_t::determine_redirections( // Make a redirection spec from the redirect token. assert(oper && oper->is_valid() && "expected to have a valid redirection"); - redirection_spec_t spec{oper->fd, oper->mode, std::move(target)}; + auto spec = new_redirection_spec(oper->fd, oper->mode, target.c_str()); // Validate this spec. - if (spec.mode == redirection_mode_t::fd && !spec.is_close() && - !spec.get_target_as_fd().has_value()) { + if (spec->mode() == redirection_mode_t::fd && !spec->is_close() && + !spec->get_target_as_fd()) { const wchar_t *fmt = _(L"Requested redirection to '%ls', which is not a valid file descriptor"); - return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec.target.c_str()); + return report_error(STATUS_INVALID_ARGS, redir_node, fmt, spec->target()->c_str()); } out_redirections->push_back(std::move(spec)); @@ -1077,8 +1083,8 @@ end_execution_reason_t parse_execution_context_t::populate_block_process( } assert(args_or_redirs && "Should have args_or_redirs"); - redirection_spec_list_t redirections; - auto reason = this->determine_redirections(*args_or_redirs, &redirections); + auto redirections = new_redirection_spec_list(); + auto reason = this->determine_redirections(*args_or_redirs, &*redirections); if (reason == end_execution_reason_t::ok) { proc->type = process_type_t::block_node; proc->block_node_source = pstree; @@ -1096,18 +1102,18 @@ end_execution_reason_t parse_execution_context_t::apply_variable_assignments( for (const ast::variable_assignment_t &variable_assignment : variable_assignment_list) { const wcstring &source = get_source(variable_assignment); auto equals_pos = variable_assignment_equals_pos(source); - assert(equals_pos.has_value()); + assert(equals_pos); const wcstring variable_name = source.substr(0, *equals_pos); const wcstring expression = source.substr(*equals_pos + 1); completion_list_t expression_expanded; - parse_error_list_t errors; + auto errors = new_parse_error_list(); // TODO this is mostly copied from expand_arguments_from_nodes, maybe extract to function auto expand_ret = - expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &errors); - parse_error_offset_source_start(&errors, variable_assignment.range.start + *equals_pos + 1); + expand_string(expression, &expression_expanded, expand_flags_t{}, ctx, &*errors); + errors->offset_source_start(variable_assignment.range.start + *equals_pos + 1); switch (expand_ret.result) { case expand_result_t::error: - return this->report_errors(expand_ret.status, errors); + return this->report_errors(expand_ret.status, *errors); case expand_result_t::cancel: return end_execution_reason_t::cancelled; @@ -1180,7 +1186,7 @@ end_execution_reason_t parse_execution_context_t::populate_job_process( } end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( - job_t *j, const ast::job_t &job_node, const block_t *associated_block) { + job_t *j, const ast::job_pipeline_t &job_node, const block_t *associated_block) { UNUSED(associated_block); // We are going to construct process_t structures for every statement in the job. @@ -1196,8 +1202,8 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( break; } // Handle the pipe, whose fd may not be the obvious stdout. - auto parsed_pipe = pipe_or_redir_t::from_string(get_source(jc.pipe)); - assert(parsed_pipe.has_value() && parsed_pipe->is_pipe && "Failed to parse valid pipe"); + auto parsed_pipe = pipe_or_redir_from_string(get_source(jc.pipe).c_str()); + assert(parsed_pipe && parsed_pipe->is_pipe && "Failed to parse valid pipe"); if (!parsed_pipe->is_valid()) { result = report_error(STATUS_INVALID_ARGS, jc.pipe, ILLEGAL_FD_ERR_MSG, get_source(jc.pipe).c_str()); @@ -1207,8 +1213,8 @@ end_execution_reason_t parse_execution_context_t::populate_job_from_job_node( if (parsed_pipe->stderr_merge) { // This was a pipe like &| which redirects both stdout and stderr. // Also redirect stderr to stdout. - auto specs = processes.back()->redirection_specs(); - specs.push_back(get_stderr_merge()); + auto specs = processes.back()->redirection_specs().clone(); + specs->push_back(get_stderr_merge()); processes.back()->set_redirection_specs(std::move(specs)); } @@ -1244,7 +1250,7 @@ static bool remove_job(parser_t &parser, const job_t *job) { /// For historical reasons the 'not' and 'time' prefix are "inside out". That is, it's /// 'not time cmd'. Note that a time appearing anywhere in the pipeline affects the whole job. /// `sleep 1 | not time true` will time the whole job! -static bool job_node_wants_timing(const ast::job_t &job_node) { +static bool job_node_wants_timing(const ast::job_pipeline_t &job_node) { // Does our job have the job-level time prefix? if (job_node.time) return true; @@ -1266,7 +1272,7 @@ static bool job_node_wants_timing(const ast::job_t &job_node) { return false; } -end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_t &job_node, +end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_pipeline_t &job_node, const block_t *associated_block) { if (auto ret = check_end_execution()) { return *ret; @@ -1288,7 +1294,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_t &jo scoped_push saved_eval_level(&parser->eval_level, parser->eval_level + 1); // Save the node index. - scoped_push saved_node(&executing_job_node, &job_node); + scoped_push saved_node(&executing_job_node, &job_node); // Profiling support. profile_item_t *profile_item = this->parser->create_profile_item(); @@ -1301,7 +1307,7 @@ end_execution_reason_t parse_execution_context_t::run_1_job(const ast::job_t &jo if (job_is_simple_block(job_node)) { bool do_time = job_node.time.has_value(); // If no-exec has been given, there is nothing to time. - cleanup_t timer = push_timer(do_time && !no_exec()); + auto timer = push_timer(do_time && !no_exec()); const block_t *block = nullptr; end_execution_reason_t result = this->apply_variable_assignments(nullptr, job_node.variables, &block); @@ -1577,7 +1583,7 @@ bool parse_execution_context_t::use_job_control() const { DIE("Unreachable"); } -int parse_execution_context_t::line_offset_of_node(const ast::job_t *node) { +int parse_execution_context_t::line_offset_of_node(const ast::job_pipeline_t *node) { // If we're not executing anything, return -1. if (!node) { return -1; diff --git a/src/parse_execution.h b/src/parse_execution.h index 8b1d78aba..34553c8f5 100644 --- a/src/parse_execution.h +++ b/src/parse_execution.h @@ -47,7 +47,7 @@ class parse_execution_context_t : noncopyable_t { int cancel_signal{0}; // The currently executing job node, used to indicate the line number. - const ast::job_t *executing_job_node{}; + const ast::job_pipeline_t *executing_job_node{}; // Cached line number information. size_t cached_lineno_offset = 0; @@ -84,7 +84,7 @@ class parse_execution_context_t : noncopyable_t { wcstring *out_cmd, wcstring_list_t *out_args) const; /// Indicates whether a job is a simple block (one block, no redirections). - bool job_is_simple_block(const ast::job_t &job) const; + bool job_is_simple_block(const ast::job_pipeline_t &job) const; enum process_type_t process_type_for_command(const ast::decorated_statement_t &statement, const wcstring &cmd) const; @@ -135,7 +135,7 @@ class parse_execution_context_t : noncopyable_t { end_execution_reason_t determine_redirections(const ast::argument_or_redirection_list_t &list, redirection_spec_list_t *out_redirections); - end_execution_reason_t run_1_job(const ast::job_t &job, const block_t *associated_block); + end_execution_reason_t run_1_job(const ast::job_pipeline_t &job, const block_t *associated_block); end_execution_reason_t test_and_run_1_job_conjunction(const ast::job_conjunction_t &jc, const block_t *associated_block); end_execution_reason_t run_job_conjunction(const ast::job_conjunction_t &job_expr, @@ -144,7 +144,7 @@ class parse_execution_context_t : noncopyable_t { const block_t *associated_block); end_execution_reason_t run_job_list(const ast::andor_job_list_t &job_list_node, const block_t *associated_block); - end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_t &job_node, + end_execution_reason_t populate_job_from_job_node(job_t *j, const ast::job_pipeline_t &job_node, const block_t *associated_block); // Assign a job group to the given job. @@ -154,7 +154,7 @@ class parse_execution_context_t : noncopyable_t { bool use_job_control() const; // Returns the line number of the node. Not const since it touches cached_lineno_offset. - int line_offset_of_node(const ast::job_t *node); + int line_offset_of_node(const ast::job_pipeline_t *node); int line_offset_of_character_at_offset(size_t offset); public: diff --git a/src/parse_tree.cpp b/src/parse_tree.cpp index 2590fafc2..3942f6e4d 100644 --- a/src/parse_tree.cpp +++ b/src/parse_tree.cpp @@ -20,194 +20,17 @@ parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err) { switch (err) { case tokenizer_error_t::none: - return parse_error_none; + return parse_error_code_t::none; case tokenizer_error_t::unterminated_quote: - return parse_error_tokenizer_unterminated_quote; + return parse_error_code_t::tokenizer_unterminated_quote; case tokenizer_error_t::unterminated_subshell: - return parse_error_tokenizer_unterminated_subshell; + return parse_error_code_t::tokenizer_unterminated_subshell; case tokenizer_error_t::unterminated_slice: - return parse_error_tokenizer_unterminated_slice; + return parse_error_code_t::tokenizer_unterminated_slice; case tokenizer_error_t::unterminated_escape: - return parse_error_tokenizer_unterminated_escape; + return parse_error_code_t::tokenizer_unterminated_escape; default: - return parse_error_tokenizer_other; - } -} - -/// Returns a string description of this parse error. -wcstring parse_error_t::describe_with_prefix(const wcstring &src, const wcstring &prefix, - bool is_interactive, bool skip_caret) const { - wcstring result = prefix; - // Some errors don't have their message passed in, so we construct them here. - // This affects e.g. `eval "a=(foo)"` - switch (code) { - default: - if (skip_caret && this->text.empty()) return L""; - result.append(this->text); - break; - case parse_error_andor_in_pipeline: - append_format(result, INVALID_PIPELINE_CMD_ERR_MSG, - src.substr(this->source_start, this->source_length).c_str()); - break; - case parse_error_bare_variable_assignment: { - wcstring assignment_src = src.substr(this->source_start, this->source_length); - maybe_t equals_pos = variable_assignment_equals_pos(assignment_src); - assert(equals_pos.has_value()); - wcstring variable = assignment_src.substr(0, *equals_pos); - wcstring value = assignment_src.substr(*equals_pos + 1); - append_format(result, ERROR_BAD_COMMAND_ASSIGN_ERR_MSG, variable.c_str(), - value.c_str()); - break; - } - } - - size_t start = source_start; - size_t len = source_length; - if (start >= src.size()) { - // If we are past the source, we clamp it to the end. - start = src.size() - 1; - len = 0; - } - - if (start + len > src.size()) { - len = src.size() - source_start; - } - - if (skip_caret) { - return result; - } - - // Locate the beginning of this line of source. - size_t line_start = 0; - - // Look for a newline prior to source_start. If we don't find one, start at the beginning of - // the string; otherwise start one past the newline. Note that source_start may itself point - // at a newline; we want to find the newline before it. - if (start > 0) { - size_t newline = src.find_last_of(L'\n', start - 1); - if (newline != wcstring::npos) { - line_start = newline + 1; - } - } - // Look for the newline after the source range. If the source range itself includes a - // newline, that's the one we want, so start just before the end of the range. - size_t last_char_in_range = (len == 0 ? start : start + len - 1); - size_t line_end = src.find(L'\n', last_char_in_range); - if (line_end == wcstring::npos) { - line_end = src.size(); - } - - assert(line_end >= line_start); - assert(start >= line_start); - - // Don't include the caret and line if we're interactive and this is the first line, because - // then it's obvious. - bool interactive_skip_caret = is_interactive && start == 0; - if (interactive_skip_caret) { - return result; - } - - // Append the line of text. - if (!result.empty()) result.push_back(L'\n'); - result.append(src, line_start, line_end - line_start); - - // Append the caret line. The input source may include tabs; for that reason we - // construct a "caret line" that has tabs in corresponding positions. - wcstring caret_space_line; - caret_space_line.reserve(start - line_start); - for (size_t i = line_start; i < start; i++) { - wchar_t wc = src.at(i); - if (wc == L'\t') { - caret_space_line.push_back(L'\t'); - } else if (wc == L'\n') { - // It's possible that the start points at a newline itself. In that case, - // pretend it's a space. We only expect this to be at the end of the string. - caret_space_line.push_back(L' '); - } else { - int width = fish_wcwidth(wc); - if (width > 0) { - caret_space_line.append(static_cast(width), L' '); - } - } - } - result.push_back(L'\n'); - result.append(caret_space_line); - result.push_back(L'^'); - if (len > 1) { - // Add a squiggle under the error location. - // We do it like this - // ^~~^ - // With a "^" under the start and end, and squiggles in-between. - auto width = fish_wcswidth(src.c_str() + start, len); - if (width >= 2) { - // Subtract one for each of the carets - this is important in case - // the starting char has a width of > 1. - result.append(width - 2, L'~'); - result.push_back(L'^'); - } - } - return result; -} - -wcstring parse_error_t::describe(const wcstring &src, bool is_interactive) const { - return this->describe_with_prefix(src, wcstring(), is_interactive, false); -} - -void parse_error_offset_source_start(parse_error_list_t *errors, size_t amt) { - if (amt > 0 && errors != nullptr) { - for (parse_error_t &error : *errors) { - // Preserve the special meaning of -1 as 'unknown'. - if (error.source_start != SOURCE_LOCATION_UNKNOWN) { - error.source_start += amt; - } - } - } -} - -/// Returns a string description for the given token type. -const wchar_t *token_type_description(parse_token_type_t type) { - const wchar_t *description = enum_to_str(type, token_enum_map); - if (description) return description; - return L"unknown_token_type"; -} - -const wchar_t *keyword_description(parse_keyword_t type) { - const wchar_t *keyword = enum_to_str(type, keyword_enum_map); - if (keyword) return keyword; - return L"unknown_keyword"; -} - -wcstring token_type_user_presentable_description(parse_token_type_t type, parse_keyword_t keyword) { - if (keyword != parse_keyword_t::none) { - return format_string(L"keyword '%ls'", keyword_description(keyword)); - } - - switch (type) { - case parse_token_type_t::string: - return L"a string"; - case parse_token_type_t::pipe: - return L"a pipe"; - case parse_token_type_t::redirection: - return L"a redirection"; - case parse_token_type_t::background: - return L"a '&'"; - case parse_token_type_t::andand: - return L"'&&'"; - case parse_token_type_t::oror: - return L"'||'"; - case parse_token_type_t::end: - return L"end of the statement"; - case parse_token_type_t::terminate: - return L"end of the input"; - case parse_token_type_t::error: - return L"a parse error"; - case parse_token_type_t::tokenizer_error: - return L"an incomplete token"; - case parse_token_type_t::comment: - return L"a comment"; - default: { - return format_string(L"a %ls", token_type_description(type)); - } + return parse_error_code_t::tokenizer_other; } } @@ -222,7 +45,7 @@ wcstring parse_token_t::describe() const { /// A string description appropriate for presentation to the user. wcstring parse_token_t::user_presentable_description() const { - return token_type_user_presentable_description(type, keyword); + return *token_type_user_presentable_description(type, keyword); } parsed_source_t::parsed_source_t(wcstring &&s, ast::ast_t &&ast) diff --git a/src/parse_tree.h b/src/parse_tree.h index 95ae4b603..7814155e6 100644 --- a/src/parse_tree.h +++ b/src/parse_tree.h @@ -11,10 +11,8 @@ /// A struct representing the token type that we use internally. struct parse_token_t { - enum parse_token_type_t type; // The type of the token as represented by the parser - enum parse_keyword_t keyword { - parse_keyword_t::none - }; // Any keyword represented by this token + parse_token_type_t type; // The type of the token as represented by the parser + parse_keyword_t keyword{parse_keyword_t::none}; // Any keyword represented by this token bool has_dash_prefix{false}; // Hackish: whether the source contains a dash prefix bool is_help_argument{false}; // Hackish: whether the source looks like '-h' or '--help' bool is_newline{false}; // Hackish: if TOK_END, whether the source is a newline. @@ -39,9 +37,6 @@ struct parse_token_t { constexpr parse_token_t(parse_token_type_t type) : type(type) {} }; -const wchar_t *token_type_description(parse_token_type_t type); -const wchar_t *keyword_description(parse_keyword_t type); - parse_error_code_t parse_error_from_tokenizer_error(tokenizer_error_t err); /// A type wrapping up a parse tree and the original source behind it. diff --git a/src/parse_util.cpp b/src/parse_util.cpp index dc09d3071..404819742 100644 --- a/src/parse_util.cpp +++ b/src/parse_util.cpp @@ -178,7 +178,7 @@ static int parse_util_locate_cmdsub(const wchar_t *in, const wchar_t **begin, co } } } - is_token_begin = is_token_delimiter(pos[0], pos[1]); + is_token_begin = is_token_delimiter(pos[0], std::make_shared(pos[1])); } else { escaped = false; is_token_begin = false; @@ -367,12 +367,12 @@ static void job_or_process_extent(bool process, const wchar_t *buff, size_t curs if (b) *b = end; const wcstring buffcpy(begin, end); - tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS); - maybe_t token{}; - while ((token = tok.next()) && !finished) { + auto tok = new_tokenizer(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS); + std::unique_ptr token{}; + while ((token = tok->next()) && !finished) { size_t tok_begin = token->offset; - switch (token->type) { + switch (token->type_) { case token_type_t::pipe: { if (!process) { break; @@ -440,13 +440,13 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar const wcstring buffcpy = wcstring(cmdsubst_begin, cmdsubst_end - cmdsubst_begin); - tokenizer_t tok(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED); - while (maybe_t token = tok.next()) { + auto tok = new_tokenizer(buffcpy.c_str(), TOK_ACCEPT_UNFINISHED); + while (std::unique_ptr token = tok->next()) { size_t tok_begin = token->offset; size_t tok_end = tok_begin; // Calculate end of token. - if (token->type == token_type_t::string) { + if (token->type_ == token_type_t::string) { tok_end += token->length; } @@ -459,14 +459,14 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar // If cursor is inside the token, this is the token we are looking for. If so, set a and b // and break. - if (token->type == token_type_t::string && tok_end >= offset_within_cmdsubst) { + if (token->type_ == token_type_t::string && tok_end >= offset_within_cmdsubst) { a = cmdsubst_begin + token->offset; b = a + token->length; break; } // Remember previous string token. - if (token->type == token_type_t::string) { + if (token->type_ == token_type_t::string) { pa = cmdsubst_begin + token->offset; pb = pa + token->length; } @@ -486,7 +486,7 @@ void parse_util_token_extent(const wchar_t *buff, size_t cursor_pos, const wchar wcstring parse_util_unescape_wildcards(const wcstring &str) { wcstring result; result.reserve(str.size()); - bool unesc_qmark = !feature_test(features_t::qmark_noglob); + bool unesc_qmark = !feature_test(feature_flag_t::qmark_noglob); const wchar_t *const cs = str.c_str(); for (size_t i = 0; cs[i] != L'\0'; i++) { @@ -541,11 +541,11 @@ static wchar_t get_quote(const wcstring &cmd_str, size_t len) { } wchar_t parse_util_get_quote_type(const wcstring &cmd, size_t pos) { - tokenizer_t tok(cmd.c_str(), TOK_ACCEPT_UNFINISHED); - while (auto token = tok.next()) { - if (token->type == token_type_t::string && + auto tok = new_tokenizer(cmd.c_str(), TOK_ACCEPT_UNFINISHED); + while (auto token = tok->next()) { + if (token->type_ == token_type_t::string && token->location_in_or_at_end_of_source_range(pos)) { - return get_quote(tok.text_of(*token), pos - token->offset); + return get_quote(*tok->text_of(*token), pos - token->offset); } } return L'\0'; @@ -815,11 +815,11 @@ static bool append_syntax_error(parse_error_list_t *errors, size_t source_locati parse_error_t error; error.source_start = source_location; error.source_length = source_length; - error.code = parse_error_syntax; + error.code = parse_error_code_t::syntax; va_list va; va_start(va, fmt); - error.text = vformat_string(fmt, va); + error.text = std::make_unique(vformat_string(fmt, va)); va_end(va); errors->push_back(std::move(error)); @@ -965,13 +965,13 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen if (out_errors) { const wchar_t *fmt = L"Invalid token '%ls'"; if (arg_src.length() == 2 && arg_src[0] == L'\\' && - (arg_src[1] == L'c' || towlower(arg_src[1]) == L'u' - || towlower(arg_src[1]) == L'x')) { + (arg_src[1] == L'c' || towlower(arg_src[1]) == L'u' || + towlower(arg_src[1]) == L'x')) { fmt = L"Incomplete escape sequence '%ls'"; } - append_syntax_error(out_errors, source_start + begin, end - begin, - fmt, arg_src.c_str()); + append_syntax_error(out_errors, source_start + begin, end - begin, fmt, + arg_src.c_str()); } return 1; } @@ -1031,17 +1031,17 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen err |= check_subtoken(checked, paren_begin - has_dollar); assert(paren_begin < paren_end && "Parens out of order?"); - parse_error_list_t subst_errors; - err |= parse_util_detect_errors(subst, &subst_errors); + auto subst_errors = new_parse_error_list(); + err |= parse_util_detect_errors(subst, &*subst_errors); // Our command substitution produced error offsets relative to its source. Tweak the // offsets of the errors in the command substitution to account for both its offset // within the string, and the offset of the node. size_t error_offset = paren_begin + 1 + source_start; - parse_error_offset_source_start(&subst_errors, error_offset); + subst_errors->offset_source_start(error_offset); if (out_errors != nullptr) { - out_errors->insert(out_errors->end(), subst_errors.begin(), subst_errors.end()); + out_errors->append(&*subst_errors); } checked = paren_end + 1; @@ -1058,7 +1058,7 @@ parser_test_error_bits_t parse_util_detect_errors_in_argument(const ast::argumen } /// Given that the job given by node should be backgrounded, return true if we detect any errors. -static bool detect_errors_in_backgrounded_job(const ast::job_t &job, +static bool detect_errors_in_backgrounded_job(const ast::job_pipeline_t &job, parse_error_list_t *parse_errors) { using namespace ast; auto source_range = job.try_source_range(); @@ -1127,10 +1127,10 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, const statement_t *st = dst.parent->as(); // Walk up to the job. - const ast::job_t *job = nullptr; + const ast::job_pipeline_t *job = nullptr; for (const node_t *cursor = st; job == nullptr; cursor = cursor->parent) { assert(cursor && "Reached root without finding a job"); - job = cursor->try_as(); + job = cursor->try_as(); } assert(job && "Should have found the job"); @@ -1185,9 +1185,9 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // Check that we can expand the command. // Make a new error list so we can fix the offset for just those, then append later. wcstring command; - parse_error_list_t new_errors; + auto new_errors = new_parse_error_list(); if (expand_to_command_and_args(unexp_command, operation_context_t::empty(), &command, - nullptr, &new_errors, + nullptr, &*new_errors, true /* skip wildcards */) == expand_result_t::error) { errored = true; } @@ -1244,8 +1244,8 @@ static bool detect_errors_in_decorated_statement(const wcstring &buff_src, // The expansion errors here go from the *command* onwards, // so we need to offset them by the *command* offset, // excluding the decoration. - parse_error_offset_source_start(&new_errors, dst.command.source_range().start); - vec_append(*parse_errors, std::move(new_errors)); + new_errors->offset_source_start(dst.command.source_range().start); + parse_errors->append(&*new_errors); } } return errored; @@ -1304,7 +1304,7 @@ parser_test_error_bits_t parse_util_detect_errors(const ast::ast_t &ast, const w } else if (const argument_t *arg = node.try_as()) { const wcstring &arg_src = arg->source(buff_src, &storage); res |= parse_util_detect_errors_in_argument(*arg, arg_src, out_errors); - } else if (const ast::job_t *job = node.try_as()) { + } else if (const ast::job_pipeline_t *job = node.try_as()) { // Disallow background in the following cases: // // foo & ; and bar @@ -1352,18 +1352,19 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, // Parse the input string into an ast. Some errors are detected here. using namespace ast; - parse_error_list_t parse_errors; - auto ast = ast_t::parse(buff_src, parse_flags, &parse_errors); + auto parse_errors = new_parse_error_list(); + auto ast = ast_t::parse(buff_src, parse_flags, &*parse_errors); if (allow_incomplete) { // Issue #1238: If the only error was unterminated quote, then consider this to have parsed // successfully. - size_t idx = parse_errors.size(); + size_t idx = parse_errors->size(); while (idx--) { - if (parse_errors.at(idx).code == parse_error_tokenizer_unterminated_quote || - parse_errors.at(idx).code == parse_error_tokenizer_unterminated_subshell) { + if (parse_errors->at(idx)->code() == parse_error_code_t::tokenizer_unterminated_quote || + parse_errors->at(idx)->code() == + parse_error_code_t::tokenizer_unterminated_subshell) { // Remove this error, since we don't consider it a real error. has_unclosed_quote_or_subshell = true; - parse_errors.erase(parse_errors.begin() + idx); + parse_errors->erase(idx); } } } @@ -1376,8 +1377,8 @@ parser_test_error_bits_t parse_util_detect_errors(const wcstring &buff_src, } // Early parse error, stop here. - if (!parse_errors.empty()) { - if (out_errors) vec_append(*out_errors, std::move(parse_errors)); + if (!parse_errors->empty()) { + if (out_errors) out_errors->append(&*parse_errors); return PARSER_TEST_ERROR; } @@ -1390,24 +1391,24 @@ maybe_t parse_util_detect_errors_in_argument_list(const wcstring &arg_ // Helper to return a description of the first error. auto get_error_text = [&](const parse_error_list_t &errors) { assert(!errors.empty() && "Expected an error"); - return errors.at(0).describe_with_prefix(arg_list_src, prefix, false /* not interactive */, - false /* don't skip caret */); + return *errors.at(0)->describe_with_prefix( + arg_list_src, prefix, false /* not interactive */, false /* don't skip caret */); }; // Parse the string as a freestanding argument list. using namespace ast; - parse_error_list_t errors; - auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &errors); - if (!errors.empty()) { - return get_error_text(errors); + auto errors = new_parse_error_list(); + auto ast = ast_t::parse_argument_list(arg_list_src, parse_flag_none, &*errors); + if (!errors->empty()) { + return get_error_text(*errors); } // Get the root argument list and extract arguments from it. // Test each of these. for (const argument_t &arg : ast.top()->as()->arguments) { const wcstring arg_src = arg.source(arg_list_src); - if (parse_util_detect_errors_in_argument(arg, arg_src, &errors)) { - return get_error_text(errors); + if (parse_util_detect_errors_in_argument(arg, arg_src, &*errors)) { + return get_error_text(*errors); } } return none(); diff --git a/src/parse_util.h b/src/parse_util.h index bd318566b..54f492378 100644 --- a/src/parse_util.h +++ b/src/parse_util.h @@ -14,7 +14,8 @@ namespace ast { struct argument_t; class ast_t; } // namespace ast -struct tok_t; +struct Tok; +using tok_t = Tok; /// Handles slices: the square brackets in an expression like $foo[5..4] /// \return the length of the slice starting at \p in, or 0 if there is no slice, or -1 on error. diff --git a/src/parser.cpp b/src/parser.cpp index b9402fdd0..d43f16255 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -29,7 +29,7 @@ #include "parse_constants.h" #include "parse_execution.h" #include "proc.h" -#include "signal.h" +#include "signals.h" #include "wutil.h" // IWYU pragma: keep class io_chain_t; @@ -39,6 +39,14 @@ static wcstring user_presentable_path(const wcstring &path, const environment_t return replace_home_directory_with_tilde(path, vars); } +void library_data_t::set_exit_current_script(bool val) { + exit_current_script = val; +}; + +void library_data_t::set_returning(bool val) { + returning = val; +}; + parser_t::parser_t(std::shared_ptr vars, bool is_principal) : variables(std::move(vars)), is_principal_(is_principal) { assert(variables.get() && "Null variables in parser initializer"); @@ -439,10 +447,11 @@ wcstring parser_t::current_line() { // Use an error with empty text. assert(source_offset >= 0); parse_error_t empty_error = {}; + empty_error.text = std::make_unique(); empty_error.source_start = source_offset; - wcstring line_info = empty_error.describe_with_prefix(execution_context->get_source(), prefix, - is_interactive(), skip_caret); + wcstring line_info = *empty_error.describe_with_prefix(execution_context->get_source(), prefix, + is_interactive(), skip_caret); if (!line_info.empty()) { line_info.push_back(L'\n'); } @@ -454,7 +463,7 @@ wcstring parser_t::current_line() { void parser_t::job_add(shared_ptr job) { assert(job != nullptr); assert(!job->processes.empty()); - job_list.push_front(std::move(job)); + job_list.insert(job_list.begin(), std::move(job)); } void parser_t::job_promote(job_t *job) { @@ -499,13 +508,13 @@ profile_item_t *parser_t::create_profile_item() { eval_res_t parser_t::eval(const wcstring &cmd, const io_chain_t &io, const job_group_ref_t &job_group, enum block_type_t block_type) { // Parse the source into a tree, if we can. - parse_error_list_t error_list; - if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &error_list)) { + auto error_list = new_parse_error_list(); + if (parsed_source_ref_t ps = parse_source(wcstring{cmd}, parse_flag_none, &*error_list)) { return this->eval(ps, io, job_group, block_type); } else { // Get a backtrace. This includes the message. wcstring backtrace_and_desc; - this->get_backtrace(cmd, error_list, backtrace_and_desc); + this->get_backtrace(cmd, *error_list, backtrace_and_desc); // Print it. std::fwprintf(stderr, L"%ls\n", backtrace_and_desc.c_str()); @@ -623,20 +632,20 @@ template eval_res_t parser_t::eval_node(const parsed_source_ref_t &, const ast:: void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &errors, wcstring &output) const { if (!errors.empty()) { - const parse_error_t &err = errors.at(0); + const auto *err = errors.at(0); // Determine if we want to try to print a caret to point at the source error. The - // err.source_start <= src.size() check is due to the nasty way that slices work, which is + // err.source_start() <= src.size() check is due to the nasty way that slices work, which is // by rewriting the source. size_t which_line = 0; bool skip_caret = true; - if (err.source_start != SOURCE_LOCATION_UNKNOWN && err.source_start <= src.size()) { + if (err->source_start() != SOURCE_LOCATION_UNKNOWN && err->source_start() <= src.size()) { // Determine which line we're on. - which_line = 1 + std::count(src.begin(), src.begin() + err.source_start, L'\n'); + which_line = 1 + std::count(src.begin(), src.begin() + err->source_start(), L'\n'); // Don't include the caret if we're interactive, this is the first line of text, and our // source is at its beginning, because then it's obvious. - skip_caret = (is_interactive() && which_line == 1 && err.source_start == 0); + skip_caret = (is_interactive() && which_line == 1 && err->source_start() == 0); } wcstring prefix; @@ -655,7 +664,7 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro } const wcstring description = - err.describe_with_prefix(src, prefix, is_interactive(), skip_caret); + *err->describe_with_prefix(src, prefix, is_interactive(), skip_caret); if (!description.empty()) { output.append(description); output.push_back(L'\n'); @@ -664,6 +673,19 @@ void parser_t::get_backtrace(const wcstring &src, const parse_error_list_t &erro } } +RustFFIJobList parser_t::ffi_jobs() const { + return RustFFIJobList{const_cast(job_list.data()), job_list.size()}; +} + +bool parser_t::ffi_has_funtion_block() const { + for (const auto &b : blocks()) { + if (b.is_function_call()) { + return true; + } + } + return false; +} + block_t::block_t(block_type_t t) : block_type(t) {} wcstring block_t::description() const { diff --git a/src/parser.h b/src/parser.h index b1dfc0d51..9381426ab 100644 --- a/src/parser.h +++ b/src/parser.h @@ -13,6 +13,7 @@ #include #include "common.h" +#include "cxx.h" #include "env.h" #include "expand.h" #include "job_group.h" @@ -38,7 +39,7 @@ inline bool event_block_list_blocks_type(const event_blockage_list_t &ebls) { } /// Types of blocks. -enum class block_type_t : uint16_t { +enum class block_type_t : uint8_t { while_block, /// While loop block for_block, /// For loop block if_block, /// If block @@ -230,6 +231,9 @@ struct library_data_t { /// Used to get the full text of the current job for `status current-commandline`. wcstring commandline; } status_vars; + + void set_exit_current_script(bool val); + void set_returning(bool val); }; /// The result of parser_t::eval family. @@ -469,7 +473,10 @@ class parser_t : public std::enable_shared_from_this { std::shared_ptr shared(); /// \return a cancel poller for checking if this parser has been signalled. + /// autocxx falls over with this so hide it. +#if INCLUDE_RUST_HEADERS cancel_checker_t cancel_checker() const; +#endif /// \return the operation context for this parser. operation_context_t context(); @@ -477,6 +484,12 @@ class parser_t : public std::enable_shared_from_this { /// Checks if the max eval depth has been exceeded bool is_eval_depth_exceeded() const { return eval_level >= FISH_MAX_EVAL_DEPTH; } + /// autocxx junk. + RustFFIJobList ffi_jobs() const; + + /// autocxx junk. + bool ffi_has_funtion_block() const; + ~parser_t(); }; diff --git a/src/postfork.cpp b/src/postfork.cpp index a2884eb33..570bcd7a5 100644 --- a/src/postfork.cpp +++ b/src/postfork.cpp @@ -24,7 +24,7 @@ #include "postfork.h" #include "proc.h" #include "redirection.h" -#include "signal.h" +#include "signals.h" #include "wutil.h" // IWYU pragma: keep #ifndef JOIN_THREADS_BEFORE_FORK diff --git a/src/postfork.h b/src/postfork.h index 5c09065c1..cc2eb59c5 100644 --- a/src/postfork.h +++ b/src/postfork.h @@ -15,7 +15,8 @@ #include "common.h" #include "maybe.h" -class dup2_list_t; +struct Dup2List; +using dup2_list_t = Dup2List; class job_t; class process_t; diff --git a/src/proc.cpp b/src/proc.cpp index 8be091d50..c9d9dd318 100644 --- a/src/proc.cpp +++ b/src/proc.cpp @@ -45,7 +45,7 @@ #include "parser.h" #include "proc.h" #include "reader.h" -#include "signal.h" +#include "signals.h" #include "wutil.h" // IWYU pragma: keep /// The signals that signify crashes to us. @@ -170,11 +170,17 @@ maybe_t job_t::get_statuses() const { return st; } +const process_list_t &job_t::get_processes() const { return processes; } + +RustFFIProcList job_t::ffi_processes() const { + return RustFFIProcList{const_cast(processes.data()), processes.size()}; +} + void internal_proc_t::mark_exited(proc_status_t status) { assert(!exited() && "Process is already exited"); status_.store(status, std::memory_order_relaxed); exited_.store(true, std::memory_order_release); - topic_monitor_t::principal().post(topic_t::internal_exit); + topic_monitor_principal().post(topic_t::internal_exit); FLOG(proc_internal_proc, L"Internal proc", internal_proc_id_, L"exited with status", status.status_value()); } @@ -245,10 +251,10 @@ static void handle_child_status(const shared_ptr &job, process_t *proc, } } -process_t::process_t() = default; +process_t::process_t() : proc_redirection_specs_(new_redirection_spec_list()) {} void process_t::check_generations_before_launch() { - gens_ = topic_monitor_t::principal().current_generations(); + gens_ = topic_monitor_principal().current_generations(); } void process_t::mark_aborted_before_launch() { @@ -362,7 +368,7 @@ static void process_mark_finished_children(parser_t &parser, bool block_ok) { // The exit generation tells us if we have an exit; the signal generation allows for detecting // SIGHUP and SIGINT. // Go through each process and figure out if and how it wants to be reaped. - generation_list_t reapgens = generation_list_t::invalids(); + generation_list_t reapgens = invalid_generations(); for (const auto &j : parser.jobs()) { for (const auto &proc : j->processes) { if (!j->can_reap(proc)) continue; @@ -381,7 +387,7 @@ static void process_mark_finished_children(parser_t &parser, bool block_ok) { } // Now check for changes, optionally waiting. - if (!topic_monitor_t::principal().check(&reapgens, block_ok)) { + if (!topic_monitor_principal().check(&reapgens, block_ok)) { // Nothing changed. return; } diff --git a/src/proc.h b/src/proc.h index 63ec2ccd9..1846d9ebd 100644 --- a/src/proc.h +++ b/src/proc.h @@ -94,8 +94,9 @@ class proc_status_t { /// Construct directly from an exit code. static proc_status_t from_exit_code(int ret) { - assert(ret >= 0 && "trying to create proc_status_t from failed wait{,id,pid}() call" - " or invalid builtin exit code!"); + assert(ret >= 0 && + "trying to create proc_status_t from failed wait{,id,pid}() call" + " or invalid builtin exit code!"); // Some paranoia. constexpr int zerocode = w_exitcode(0, 0); @@ -274,9 +275,9 @@ class process_t : noncopyable_t { const wchar_t *argv0() const { return argv_.empty() ? nullptr : argv_.front().c_str(); } /// Redirection list getter and setter. - const redirection_spec_list_t &redirection_specs() const { return proc_redirection_specs_; } + const redirection_spec_list_t &redirection_specs() const { return *proc_redirection_specs_; } - void set_redirection_specs(redirection_spec_list_t specs) { + void set_redirection_specs(rust::Box specs) { this->proc_redirection_specs_ = std::move(specs); } @@ -339,7 +340,7 @@ class process_t : noncopyable_t { private: wcstring_list_t argv_; - redirection_spec_list_t proc_redirection_specs_; + rust::Box proc_redirection_specs_; // The wait handle. This is constructed lazily, and cached. wait_handle_ref_t wait_handle_{}; @@ -349,6 +350,11 @@ using process_ptr_t = std::unique_ptr; using process_list_t = std::vector; class parser_t; +struct RustFFIProcList { + process_ptr_t *procs; + size_t count; +}; + /// A struct representing a job. A job is a pipeline of one or more processes. class job_t : noncopyable_t { public: @@ -383,6 +389,9 @@ class job_t : noncopyable_t { job_t(const properties_t &props, wcstring command_str); ~job_t(); + /// Autocxx needs to see this. + job_t(const job_t &) = delete; + /// Returns the command as a wchar_t *. */ const wchar_t *command_wcstr() const { return command_str.c_str(); } @@ -440,6 +449,9 @@ class job_t : noncopyable_t { /// A non-user-visible, never-recycled job ID. const internal_job_id_t internal_job_id; + /// Getter to enable ffi. + internal_job_id_t get_internal_job_id() const { return internal_job_id; } + /// Flags associated with the job. struct flags_t { /// Whether the specified job is completely constructed: every process in the job has been @@ -522,9 +534,21 @@ class job_t : noncopyable_t { /// \returns the statuses for this job. maybe_t get_statuses() const; + + /// \returns the list of processes. + const process_list_t &get_processes() const; + + /// autocxx junk. + RustFFIProcList ffi_processes() const; }; using job_ref_t = std::shared_ptr; +// Helper junk for autocxx. +struct RustFFIJobList { + job_ref_t *jobs; + size_t count; +}; + /// Whether this shell is attached to a tty. bool is_interactive_session(); void set_interactive_session(bool flag); @@ -540,7 +564,7 @@ bool no_exec(); void mark_no_exec(); // List of jobs. -using job_list_t = std::deque; +using job_list_t = std::vector; /// The current job control mode. /// diff --git a/src/reader.cpp b/src/reader.cpp index bb1e101ee..1d2a14bfc 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -54,6 +54,7 @@ #include "exec.h" #include "expand.h" #include "fallback.h" // IWYU pragma: keep +#include "fd_readable_set.rs.h" #include "fds.h" #include "flog.h" #include "function.h" @@ -75,7 +76,7 @@ #include "proc.h" #include "reader.h" #include "screen.h" -#include "signal.h" +#include "signals.h" #include "termsize.h" #include "tokenizer.h" #include "wcstringutil.h" @@ -431,12 +432,12 @@ class reader_history_search_t { assert(offset != wcstring::npos && "Should have found a match in the search result"); add_if_new({std::move(text), offset}); } else if (mode_ == token) { - tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED); + auto tok = new_tokenizer(text.c_str(), TOK_ACCEPT_UNFINISHED); std::vector local_tokens; - while (auto token = tok.next()) { - if (token->type != token_type_t::string) continue; - wcstring text = tok.text_of(*token); + while (auto token = tok->next()) { + if (token->type_ != token_type_t::string) continue; + wcstring text = *tok->text_of(*token); size_t offset = find(text, needle); if (offset != wcstring::npos) { local_tokens.push_back({std::move(text), offset}); @@ -864,7 +865,7 @@ class reader_data_t : public std::enable_shared_from_this { /// try expanding it as a wildcard, populating \p result with the expanded string. expand_result_t::result_t try_expand_wildcard(wcstring wc, size_t pos, wcstring *result); - void move_word(editable_line_t *el, bool move_right, bool erase, enum move_word_style_t style, + void move_word(editable_line_t *el, bool move_right, bool erase, move_word_style_t style, bool newv); void run_input_command_scripts(const wcstring_list_t &cmds); @@ -897,8 +898,9 @@ class reader_data_t : public std::enable_shared_from_this { bool can_autosuggest() const; void autosuggest_completed(autosuggestion_t result); void update_autosuggestion(); - void accept_autosuggestion(bool full, bool single = false, - move_word_style_t style = move_word_style_punctuation); + void accept_autosuggestion( + bool full, bool single = false, + move_word_style_t style = move_word_style_t::move_word_style_punctuation); void super_highlight_me_plenty(); /// Finish up any outstanding syntax highlighting, before execution. @@ -2114,11 +2116,11 @@ void reader_data_t::accept_autosuggestion(bool full, bool single, move_word_styl autosuggestion.text.substr(command_line.size(), 1)); } else { // Accept characters according to the specified style. - move_word_state_machine_t state(style); + auto state = new_move_word_state_machine(style); size_t want; for (want = command_line.size(); want < autosuggestion.text.size(); want++) { wchar_t wc = autosuggestion.text.at(want); - if (!state.consume_char(wc)) break; + if (!state->consume_char(wc)) break; } size_t have = command_line.size(); replace_substring(&command_line, command_line.size(), 0, @@ -2647,13 +2649,13 @@ enum move_word_dir_t { MOVE_DIR_LEFT, MOVE_DIR_RIGHT }; /// \param erase Whether to erase the characters along the way or only move past them. /// \param newv if the new kill item should be appended to the previous kill item or not. void reader_data_t::move_word(editable_line_t *el, bool move_right, bool erase, - enum move_word_style_t style, bool newv) { + move_word_style_t style, bool newv) { // Return if we are already at the edge. const size_t boundary = move_right ? el->size() : 0; if (el->position() == boundary) return; // When moving left, a value of 1 means the character at index 0. - move_word_state_machine_t state(style); + auto state = new_move_word_state_machine(style); const wchar_t *const command_line = el->text().c_str(); const size_t start_buff_pos = el->position(); @@ -2661,7 +2663,7 @@ void reader_data_t::move_word(editable_line_t *el, bool move_right, bool erase, while (buff_pos != boundary) { size_t idx = (move_right ? buff_pos : buff_pos - 1); wchar_t c = command_line[idx]; - if (!state.consume_char(c)) break; + if (!state->consume_char(c)) break; buff_pos = (move_right ? buff_pos + 1 : buff_pos - 1); } @@ -2709,7 +2711,7 @@ void reader_data_t::set_buffer_maintaining_pager(const wcstring &b, size_t pos, /// Run the specified command with the correct terminal modes, and while taking care to perform job /// notification, set the title, etc. static eval_res_t reader_run_command(parser_t &parser, const wcstring &cmd) { - wcstring ft = tok_command(cmd); + wcstring ft = *tok_command(cmd); // Provide values for `status current-command` and `status current-commandline` if (!ft.empty()) { @@ -2754,13 +2756,13 @@ static eval_res_t reader_run_command(parser_t &parser, const wcstring &cmd) { } static parser_test_error_bits_t reader_shell_test(const parser_t &parser, const wcstring &bstr) { - parse_error_list_t errors; + auto errors = new_parse_error_list(); parser_test_error_bits_t res = - parse_util_detect_errors(bstr, &errors, true /* do accept incomplete */); + parse_util_detect_errors(bstr, &*errors, true /* do accept incomplete */); if (res & PARSER_TEST_ERROR) { wcstring error_desc; - parser.get_backtrace(bstr, errors, error_desc); + parser.get_backtrace(bstr, *errors, error_desc); // Ensure we end with a newline. Also add an initial newline, because it's likely the user // just hit enter and so there's junk on the current line. @@ -3302,10 +3304,10 @@ static wchar_t unescaped_quote(const wcstring &str, size_t pos) { /// Returns true if the last token is a comment. static bool text_ends_in_comment(const wcstring &text) { - tokenizer_t tok(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS); + auto tok = new_tokenizer(text.c_str(), TOK_ACCEPT_UNFINISHED | TOK_SHOW_COMMENTS); bool is_comment = false; - while (auto token = tok.next()) { - is_comment = token->type == token_type_t::comment; + while (auto token = tok->next()) { + is_comment = token->type_ == token_type_t::comment; } return is_comment; } @@ -3362,7 +3364,7 @@ maybe_t reader_data_t::read_normal_chars(readline_loop_state_t &rl while (accumulated_chars.size() < limit) { bool allow_commands = (accumulated_chars.empty()); auto evt = inputter.read_char(allow_commands ? normal_handler : empty_handler); - if (!event_is_normal_char(evt) || !fd_readable_set_t::poll_fd_readable(conf.in)) { + if (!event_is_normal_char(evt) || !poll_fd_readable(conf.in)) { event_needing_handling = std::move(evt); break; } else if (evt.input_style == char_input_style_t::notfirst && accumulated_chars.empty() && @@ -3798,9 +3800,10 @@ void reader_data_t::handle_readline_command(readline_cmd_t c, readline_loop_stat case rl::backward_kill_path_component: case rl::backward_kill_bigword: { move_word_style_t style = - (c == rl::backward_kill_bigword ? move_word_style_whitespace - : c == rl::backward_kill_path_component ? move_word_style_path_components - : move_word_style_punctuation); + (c == rl::backward_kill_bigword ? move_word_style_t::move_word_style_whitespace + : c == rl::backward_kill_path_component + ? move_word_style_t::move_word_style_path_components + : move_word_style_t::move_word_style_punctuation); // Is this the same killring item as the last kill? bool newv = (rls.last_cmd != rl::backward_kill_word && rls.last_cmd != rl::backward_kill_path_component && @@ -3812,8 +3815,8 @@ void reader_data_t::handle_readline_command(readline_cmd_t c, readline_loop_stat case rl::kill_bigword: { // The "bigword" functions differ only in that they move to the next whitespace, not // punctuation. - auto move_style = - (c == rl::kill_word) ? move_word_style_punctuation : move_word_style_whitespace; + auto move_style = (c == rl::kill_word) ? move_word_style_t::move_word_style_punctuation + : move_word_style_t::move_word_style_whitespace; move_word(active_edit_line(), MOVE_DIR_RIGHT, true /* erase */, move_style, rls.last_cmd != c /* same kill item if same movement */); break; @@ -3830,8 +3833,9 @@ void reader_data_t::handle_readline_command(readline_cmd_t c, readline_loop_stat break; } - auto move_style = (c != rl::backward_bigword) ? move_word_style_punctuation - : move_word_style_whitespace; + auto move_style = (c != rl::backward_bigword) + ? move_word_style_t::move_word_style_punctuation + : move_word_style_t::move_word_style_whitespace; move_word(active_edit_line(), MOVE_DIR_LEFT, false /* do not erase */, move_style, false); break; @@ -3848,8 +3852,9 @@ void reader_data_t::handle_readline_command(readline_cmd_t c, readline_loop_stat break; } - auto move_style = (c != rl::forward_bigword) ? move_word_style_punctuation - : move_word_style_whitespace; + auto move_style = (c != rl::forward_bigword) + ? move_word_style_t::move_word_style_punctuation + : move_word_style_t::move_word_style_whitespace; editable_line_t *el = active_edit_line(); if (el->position() < el->size()) { move_word(el, MOVE_DIR_RIGHT, false /* do not erase */, move_style, false); @@ -4071,7 +4076,8 @@ void reader_data_t::handle_readline_command(readline_cmd_t c, readline_loop_stat // We apply the operation from the current location to the end of the word. size_t pos = el->position(); size_t init_pos = pos; - move_word(el, MOVE_DIR_RIGHT, false, move_word_style_punctuation, false); + move_word(el, MOVE_DIR_RIGHT, false, move_word_style_t::move_word_style_punctuation, + false); wcstring replacement; for (; pos < el->position(); pos++) { wchar_t chr = el->text().at(pos); @@ -4718,11 +4724,11 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) { } // Parse into an ast and detect errors. - parse_error_list_t errors; - auto ast = ast::ast_t::parse(str, parse_flag_none, &errors); + auto errors = new_parse_error_list(); + auto ast = ast::ast_t::parse(str, parse_flag_none, &*errors); bool errored = ast.errored(); if (!errored) { - errored = parse_util_detect_errors(ast, str, &errors); + errored = parse_util_detect_errors(ast, str, &*errors); } if (!errored) { // Construct a parsed source ref. @@ -4732,7 +4738,7 @@ static int read_ni(parser_t &parser, int fd, const io_chain_t &io) { return 0; } else { wcstring sb; - parser.get_backtrace(str, errors, sb); + parser.get_backtrace(str, *errors, sb); std::fwprintf(stderr, L"%ls", sb.c_str()); return 1; } diff --git a/src/redirection.cpp b/src/redirection.cpp deleted file mode 100644 index 1e884809d..000000000 --- a/src/redirection.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "redirection.h" - -#include -#include - -#include - -#include "io.h" -#include "wutil.h" - -dup2_list_t::~dup2_list_t() = default; - -maybe_t redirection_spec_t::get_target_as_fd() const { - errno = 0; - int result = fish_wcstoi(target.c_str()); - if (errno || result < 0) return none(); - return result; -} - -int redirection_spec_t::oflags() const { - switch (mode) { - case redirection_mode_t::append: - return O_CREAT | O_APPEND | O_WRONLY; - case redirection_mode_t::overwrite: - return O_CREAT | O_WRONLY | O_TRUNC; - case redirection_mode_t::noclob: - return O_CREAT | O_EXCL | O_WRONLY; - case redirection_mode_t::input: - return O_RDONLY; - case redirection_mode_t::fd: - default: - DIE("Not a file redirection"); - } -} - -dup2_list_t dup2_list_t::resolve_chain(const io_chain_t &io_chain) { - ASSERT_IS_NOT_FORKED_CHILD(); - dup2_list_t result; - for (const auto &io : io_chain) { - if (io->source_fd < 0) { - result.add_close(io->fd); - } else { - result.add_dup2(io->source_fd, io->fd); - } - } - return result; -} - -int dup2_list_t::fd_for_target_fd(int target) const { - // Paranoia. - if (target < 0) { - return target; - } - // Note we can simply walk our action list backwards, looking for src -> target dups. - int cursor = target; - for (auto iter = actions_.rbegin(); iter != actions_.rend(); ++iter) { - if (iter->target == cursor) { - // cursor is replaced by iter->src - cursor = iter->src; - } else if (iter->src == cursor && iter->target < 0) { - // cursor is closed. - cursor = -1; - break; - } - } - return cursor; -} diff --git a/src/redirection.h b/src/redirection.h index c00082960..3e4c7d703 100644 --- a/src/redirection.h +++ b/src/redirection.h @@ -1,101 +1,32 @@ #ifndef FISH_REDIRECTION_H #define FISH_REDIRECTION_H -#include -#include -#include +#if INCLUDE_RUST_HEADERS -#include "common.h" -#include "maybe.h" +#include "redirection.rs.h" -/// This file supports specifying and applying redirections. +#else -enum class redirection_mode_t { - overwrite, // normal redirection: > file.txt - append, // appending redirection: >> file.txt - input, // input redirection: < file.txt - fd, // fd redirection: 2>&1 - noclob // noclobber redirection: >? file.txt -}; - -class io_chain_t; - -/// A struct which represents a redirection specification from the user. -/// Here the file descriptors don't represent open files - it's purely textual. -struct redirection_spec_t { - /// The redirected fd, or -1 on overflow. - /// In the common case of a pipe, this is 1 (STDOUT_FILENO). - /// For example, in the case of "3>&1" this will be 3. - int fd{-1}; - - /// The redirection mode. - redirection_mode_t mode{redirection_mode_t::overwrite}; - - /// The target of the redirection. - /// For example in "3>&1", this will be "1". - /// In "< file.txt" this will be "file.txt". - wcstring target{}; - - /// \return if this is a close-type redirection. - bool is_close() const { return mode == redirection_mode_t::fd && target == L"-"; } - - /// Attempt to parse target as an fd. Return the fd, or none() if none. - maybe_t get_target_as_fd() const; - - /// \return the open flags for this redirection. - int oflags() const; - - redirection_spec_t(int fd, redirection_mode_t mode, wcstring target) - : fd(fd), mode(mode), target(std::move(target)) {} -}; -using redirection_spec_list_t = std::vector; - -/// A class representing a sequence of basic redirections. -class dup2_list_t : noncopyable_t { - public: - /// A type that represents the action dup2(src, target). - /// If target is negative, this represents close(src). - /// Note none of the fds here are considered 'owned'. - struct action_t { - int src; - int target; - }; - - dup2_list_t() = default; - dup2_list_t(dup2_list_t &&) = default; - dup2_list_t &operator=(dup2_list_t &&) = default; - ~dup2_list_t(); - - /// \return the list of dup2 actions. - const std::vector &get_actions() const { return actions_; } - - /// Produce a dup_fd_list_t from an io_chain. This may not be called before fork(). - /// The result contains the list of fd actions (dup2 and close), as well as the list - /// of fds opened. - static dup2_list_t resolve_chain(const io_chain_t &); - - /// \return the fd ultimately dup'd to a target fd, or -1 if the target is closed. - /// For example, if target fd is 1, and we have a dup2 chain 5->3 and 3->1, then we will - /// return 5. If the target is not referenced in the chain, returns target. - int fd_for_target_fd(int target) const; - - private: - /// The list of actions. - std::vector actions_; - - /// Append a dup2 action. - void add_dup2(int src, int target) { - assert(src >= 0 && target >= 0 && "Invalid fd in add_dup2"); - // Note: record these even if src and target is the same. - // This is a note that we must clear the CLO_EXEC bit. - actions_.push_back(action_t{src, target}); - } - - /// Append a close action. - void add_close(int fd) { - assert(fd >= 0 && "Invalid fd in add_close"); - actions_.push_back(action_t{fd, -1}); - } +// Hacks to allow us to compile without Rust headers. + +enum class RedirectionMode { + overwrite, + append, + input, + fd, + noclob, }; +struct Dup2Action; +class Dup2List; +struct RedirectionSpec; +struct RedirectionSpecList; + +#endif + +using redirection_mode_t = RedirectionMode; +using redirection_spec_t = RedirectionSpec; +using redirection_spec_list_t = RedirectionSpecList; +using dup2_action_t = Dup2Action; +using dup2_list_t = Dup2List; #endif diff --git a/src/rustffi.cpp b/src/rustffi.cpp new file mode 100644 index 000000000..d5e4980a6 --- /dev/null +++ b/src/rustffi.cpp @@ -0,0 +1,21 @@ +#include + +#include "wutil.h" + +extern "C" { +void fishffi$unique_ptr$wcstring$null(std::unique_ptr *ptr) noexcept { + new (ptr) std::unique_ptr(); +} +void fishffi$unique_ptr$wcstring$raw(std::unique_ptr *ptr, wcstring *raw) noexcept { + new (ptr) std::unique_ptr(raw); +} +const wcstring *fishffi$unique_ptr$wcstring$get(const std::unique_ptr &ptr) noexcept { + return ptr.get(); +} +wcstring *fishffi$unique_ptr$wcstring$release(std::unique_ptr &ptr) noexcept { + return ptr.release(); +} +void fishffi$unique_ptr$wcstring$drop(std::unique_ptr *ptr) noexcept { + ptr->~unique_ptr(); +} +} // extern "C" diff --git a/src/signal.cpp b/src/signals.cpp similarity index 96% rename from src/signal.cpp rename to src/signals.cpp index 804fc53a9..5b91d5e8a 100644 --- a/src/signal.cpp +++ b/src/signals.cpp @@ -16,7 +16,7 @@ #include "fallback.h" // IWYU pragma: keep #include "global_safety.h" #include "reader.h" -#include "signal.h" +#include "signals.h" #include "termsize.h" #include "topic_monitor.h" #include "wutil.h" // IWYU pragma: keep @@ -243,7 +243,7 @@ static void fish_signal_handler(int sig, siginfo_t *info, void *context) { if (!observed) { reader_sighup(); } - topic_monitor_t::principal().post(topic_t::sighupint); + topic_monitor_principal().post(topic_t::sighupint); break; case SIGTERM: @@ -261,12 +261,12 @@ static void fish_signal_handler(int sig, siginfo_t *info, void *context) { s_cancellation_signal = SIGINT; } reader_handle_sigint(); - topic_monitor_t::principal().post(topic_t::sighupint); + topic_monitor_principal().post(topic_t::sighupint); break; case SIGCHLD: // A child process stopped or exited. - topic_monitor_t::principal().post(topic_t::sigchld); + topic_monitor_principal().post(topic_t::sigchld); break; case SIGALRM: @@ -429,7 +429,7 @@ sigchecker_t::sigchecker_t(topic_t signal) : topic_(signal) { } bool sigchecker_t::check() { - auto &tm = topic_monitor_t::principal(); + auto &tm = topic_monitor_principal(); generation_t gen = tm.generation_for_topic(topic_); bool changed = this->gen_ != gen; this->gen_ = gen; @@ -437,8 +437,8 @@ bool sigchecker_t::check() { } void sigchecker_t::wait() const { - auto &tm = topic_monitor_t::principal(); - generation_list_t gens = generation_list_t::invalids(); - gens.at(topic_) = this->gen_; + auto &tm = topic_monitor_principal(); + generation_list_t gens = invalid_generations(); + gens.at_mut(topic_) = this->gen_; tm.check(&gens, true /* wait */); } diff --git a/src/signal.h b/src/signals.h similarity index 100% rename from src/signal.h rename to src/signals.h diff --git a/src/timer.cpp b/src/timer.cpp deleted file mode 100644 index 4bf0311d8..000000000 --- a/src/timer.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// Functions for executing the time builtin. -#include "config.h" // IWYU pragma: keep - -#include "timer.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "common.h" -#include "fallback.h" // IWYU pragma: keep -#include "wutil.h" // IWYU pragma: keep - -// Measuring time is always complicated with many caveats. Quite apart from the typical -// gotchas faced by developers attempting to choose between monotonic vs non-monotonic and system vs -// cpu clocks, the fact that we are executing as a shell further complicates matters: we can't just -// observe the elapsed CPU time, because that does not reflect the total execution time for both -// ourselves (internal shell execution time and the time it takes for builtins and functions to -// execute) and any external processes we spawn. - -// It would be nice to use the C++1 type-safe interfaces to measure elapsed time, but that -// unfortunately is underspecified with regards to user/system time and only provides means of -// querying guaranteed monotonicity and resolution for the various clocks. It can be used to measure -// elapsed wall time nicely, but if we would like to provide information more useful for -// benchmarking and tuning then we must turn to either clock_gettime(2), with extensions for thread- -// and process-specific elapsed CPU time, or times(3) for a standard interface to overall process -// and child user/system time elapsed between snapshots. At least on some systems, times(3) has been -// deprecated in favor of getrusage(2), which offers a wider variety of metrics coalesced for SELF, -// THREAD, or CHILDREN. - -// With regards to the C++11 `` interface, there are three different time sources (clocks) -// that we can use portably: `system_clock`, `steady_clock`, and `high_resolution_clock`; with -// different properties and guarantees. While the obvious difference is the direct tradeoff between -// period and resolution (higher resolution equals ability to measure smaller time differences more -// accurately, but at the cost of rolling over more frequently), but unfortunately it is not as -// simple as starting two clocks and going with the highest resolution that hasn't rolled over. -// `system_clock` is out because it is always subject to interference due to adjustments from NTP -// servers or super users (as it reflects the "actual" time), but `high_resolution_clock` may or may -// not be aliased to `system_clock` or `steady_clock`. In practice, there's likely no need to worry -// about this too much, a survey of the different -// libraries indicates that `high_resolution_clock` is either an alias for `steady_clock` (in which -// case it offers no greater resolution) or it is an alias for `system_clock` (in which case, even -// when it offers a greater resolution than `steady_clock` it is not fit for use). - -static int64_t micros(struct timeval t) { - return (static_cast(t.tv_usec) + static_cast(t.tv_sec * 1E6)); -}; - -template -static int64_t micros(const std::chrono::duration &d) { - return std::chrono::duration_cast(d).count(); -}; - -timer_snapshot_t timer_snapshot_t::take() { - timer_snapshot_t snapshot; - - getrusage(RUSAGE_SELF, &snapshot.cpu_fish); - getrusage(RUSAGE_CHILDREN, &snapshot.cpu_children); - snapshot.wall = std::chrono::steady_clock::now(); - - return snapshot; -} - -wcstring timer_snapshot_t::print_delta(const timer_snapshot_t &t1, const timer_snapshot_t &t2, - bool verbose /* = true */) { - int64_t fish_sys_micros = micros(t2.cpu_fish.ru_stime) - micros(t1.cpu_fish.ru_stime); - int64_t fish_usr_micros = micros(t2.cpu_fish.ru_utime) - micros(t1.cpu_fish.ru_utime); - int64_t child_sys_micros = micros(t2.cpu_children.ru_stime) - micros(t1.cpu_children.ru_stime); - int64_t child_usr_micros = micros(t2.cpu_children.ru_utime) - micros(t1.cpu_children.ru_utime); - - // The result from getrusage is not necessarily realtime, it may be cached a few microseconds - // behind. In the event that execution completes extremely quickly or there is no data (say, we - // are measuring external execution time but no external processes have been launched), it can - // incorrectly appear to be negative. - fish_sys_micros = std::max(int64_t(0), fish_sys_micros); - fish_usr_micros = std::max(int64_t(0), fish_usr_micros); - child_sys_micros = std::max(int64_t(0), child_sys_micros); - child_usr_micros = std::max(int64_t(0), child_usr_micros); - - int64_t net_sys_micros = fish_sys_micros + child_sys_micros; - int64_t net_usr_micros = fish_usr_micros + child_usr_micros; - int64_t net_wall_micros = micros(t2.wall - t1.wall); - - enum class tunit { - minutes, - seconds, - milliseconds, - microseconds, - }; - - auto get_unit = [](int64_t micros) { - if (micros > 900 * 1E6) { - return tunit::minutes; - } else if (micros >= 999995) { // Move to seconds if we would overflow the %6.2 format. - return tunit::seconds; - } else if (micros >= 1000) { - return tunit::milliseconds; - } else { - return tunit::microseconds; - } - }; - - auto unit_name = [](tunit unit) { - switch (unit) { - case tunit::minutes: - return "minutes"; - case tunit::seconds: - return "seconds"; - case tunit::milliseconds: - return "milliseconds"; - case tunit::microseconds: - return "microseconds"; - } - // GCC does not recognize the exhaustive switch above - return ""; - }; - - auto unit_short_name = [](tunit unit) { - switch (unit) { - case tunit::minutes: - return "mins"; - case tunit::seconds: - return "secs"; - case tunit::milliseconds: - return "millis"; - case tunit::microseconds: - return "micros"; - } - // GCC does not recognize the exhaustive switch above - return ""; - }; - - auto convert = [](int64_t micros, tunit unit) { - switch (unit) { - case tunit::minutes: - return micros / 1.0E6 / 60.0; - case tunit::seconds: - return micros / 1.0E6; - case tunit::milliseconds: - return micros / 1.0E3; - case tunit::microseconds: - return micros / 1.0; - } - // GCC does not recognize the exhaustive switch above - return 0.0; - }; - - auto wall_unit = get_unit(net_wall_micros); - auto cpu_unit = get_unit(std::max(net_sys_micros, net_usr_micros)); - double wall_time = convert(net_wall_micros, wall_unit); - double usr_time = convert(net_usr_micros, cpu_unit); - double sys_time = convert(net_sys_micros, cpu_unit); - - wcstring output; - if (!verbose) { - append_format(output, - L"\n_______________________________" - L"\nExecuted in %6.2F %s" - L"\n usr time %6.2F %s" - L"\n sys time %6.2F %s" - L"\n", - wall_time, unit_name(wall_unit), usr_time, unit_name(cpu_unit), sys_time, - unit_name(cpu_unit)); - } else { - auto fish_unit = get_unit(std::max(fish_sys_micros, fish_usr_micros)); - auto child_unit = get_unit(std::max(child_sys_micros, child_usr_micros)); - double fish_usr_time = convert(fish_usr_micros, fish_unit); - double fish_sys_time = convert(fish_sys_micros, fish_unit); - double child_usr_time = convert(child_usr_micros, child_unit); - double child_sys_time = convert(child_sys_micros, child_unit); - - int column2_unit_len = - std::max(strlen(unit_short_name(wall_unit)), strlen(unit_short_name(cpu_unit))); - append_format(output, - L"\n________________________________________________________" - L"\nExecuted in %6.2F %-*s %-*s %s" - L"\n usr time %6.2F %-*s %6.2F %s %6.2F %s" - L"\n sys time %6.2F %-*s %6.2F %s %6.2F %s" - L"\n", - wall_time, column2_unit_len, unit_short_name(wall_unit), - static_cast(strlen(unit_short_name(fish_unit))) + 7, "fish", "external", - usr_time, column2_unit_len, unit_short_name(cpu_unit), fish_usr_time, - unit_short_name(fish_unit), child_usr_time, unit_short_name(child_unit), - sys_time, column2_unit_len, unit_short_name(cpu_unit), fish_sys_time, - unit_short_name(fish_unit), child_sys_time, unit_short_name(child_unit)); - } - return output; -}; - -static void timer_finished(const timer_snapshot_t &t1) { - auto t2 = timer_snapshot_t::take(); - - // Well, this is awkward. By defining `time` as a decorator and not a built-in, there's - // no associated stream for its output! - auto output = timer_snapshot_t::print_delta(t1, t2, true); - std::fwprintf(stderr, L"%S\n", output.c_str()); -} - -cleanup_t push_timer(bool enabled) { - if (!enabled) return {[] {}}; - - auto t1 = timer_snapshot_t::take(); - return {[=] { timer_finished(t1); }}; -} diff --git a/src/timer.h b/src/timer.h deleted file mode 100644 index ab41a2374..000000000 --- a/src/timer.h +++ /dev/null @@ -1,27 +0,0 @@ -// Prototypes for executing builtin_time function. -#ifndef FISH_TIMER_H -#define FISH_TIMER_H - -#include - -#include - -#include "common.h" - -cleanup_t push_timer(bool enabled); - -struct timer_snapshot_t { - public: - struct rusage cpu_fish; - struct rusage cpu_children; - std::chrono::time_point wall; - - static timer_snapshot_t take(); - static wcstring print_delta(const timer_snapshot_t &t1, const timer_snapshot_t &t2, - bool verbose = false); - - private: - timer_snapshot_t() {} -}; - -#endif diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp deleted file mode 100644 index 42d0264fc..000000000 --- a/src/tokenizer.cpp +++ /dev/null @@ -1,907 +0,0 @@ -// A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be -// extended to support marks, tokenizing multiple strings and disposing of unused string segments. -#include "config.h" // IWYU pragma: keep - -#include "tokenizer.h" - -#include -#include -#include -#include - -#include -#include -#include - -#include "common.h" -#include "fallback.h" // IWYU pragma: keep -#include "future_feature_flags.h" -#include "wutil.h" // IWYU pragma: keep - -// _(s) is already wgettext(s).c_str(), so let's not convert back to wcstring -const wchar_t *tokenizer_get_error_message(tokenizer_error_t err) { - switch (err) { - case tokenizer_error_t::none: - return L""; - case tokenizer_error_t::unterminated_quote: - return _(L"Unexpected end of string, quotes are not balanced"); - case tokenizer_error_t::unterminated_subshell: - return _(L"Unexpected end of string, expecting ')'"); - case tokenizer_error_t::unterminated_slice: - return _(L"Unexpected end of string, square brackets do not match"); - case tokenizer_error_t::unterminated_escape: - return _(L"Unexpected end of string, incomplete escape sequence"); - case tokenizer_error_t::invalid_redirect: - return _(L"Invalid input/output redirection"); - case tokenizer_error_t::invalid_pipe: - return _(L"Cannot use stdin (fd 0) as pipe output"); - case tokenizer_error_t::invalid_pipe_ampersand: - return _(L"|& is not valid. In fish, use &| to pipe both stdout and stderr."); - case tokenizer_error_t::closing_unopened_subshell: - return _(L"Unexpected ')' for unopened parenthesis"); - case tokenizer_error_t::illegal_slice: - return _(L"Unexpected '[' at this location"); - case tokenizer_error_t::closing_unopened_brace: - return _(L"Unexpected '}' for unopened brace expansion"); - case tokenizer_error_t::unterminated_brace: - return _(L"Unexpected end of string, incomplete parameter expansion"); - case tokenizer_error_t::expected_pclose_found_bclose: - return _(L"Unexpected '}' found, expecting ')'"); - case tokenizer_error_t::expected_bclose_found_pclose: - return _(L"Unexpected ')' found, expecting '}'"); - } - assert(0 && "Unexpected tokenizer error"); - return nullptr; -} - -/// Return an error token and mark that we no longer have a next token. -tok_t tokenizer_t::call_error(tokenizer_error_t error_type, const wchar_t *token_start, - const wchar_t *error_loc, maybe_t token_length, - size_t error_len) { - assert(error_type != tokenizer_error_t::none && "tokenizer_error_t::none passed to call_error"); - assert(error_loc >= token_start && "Invalid error location"); - assert(this->token_cursor >= token_start && "Invalid buff location"); - - // If continue_after_error is set and we have a real token length, then skip past it. - // Otherwise give up. - if (token_length.has_value() && continue_after_error) { - assert(this->token_cursor < error_loc + *token_length && "Unable to continue past error"); - this->token_cursor = error_loc + *token_length; - } else { - this->has_next = false; - } - - tok_t result{token_type_t::error}; - result.error = error_type; - result.offset = token_start - this->start; - // If we are passed a token_length, then use it; otherwise infer it from the buffer. - result.length = token_length.has_value() ? *token_length : this->token_cursor - token_start; - result.error_offset_within_token = error_loc - token_start; - result.error_length = error_len; - return result; -} - -tokenizer_t::tokenizer_t(const wchar_t *start, tok_flags_t flags) - : token_cursor(start), start(start) { - assert(start != nullptr && "Invalid start"); - - this->accept_unfinished = static_cast(flags & TOK_ACCEPT_UNFINISHED); - this->show_comments = static_cast(flags & TOK_SHOW_COMMENTS); - this->show_blank_lines = static_cast(flags & TOK_SHOW_BLANK_LINES); - this->continue_after_error = static_cast(flags & TOK_CONTINUE_AFTER_ERROR); -} - -tok_t::tok_t(token_type_t type) : type(type) {} - -/// Tests if this character can be a part of a string. Hash (#) starts a comment if it's the first -/// character in a token; otherwise it is considered a string character. See issue #953. -static bool tok_is_string_character(wchar_t c, maybe_t next) { - switch (c) { - case L'\0': - case L' ': - case L'\n': - case L'|': - case L'\t': - case L';': - case L'\r': - case L'<': - case L'>': { - // Unconditional separators. - return false; - } - case L'&': { - if (!feature_test(features_t::ampersand_nobg_in_token)) return false; - bool next_is_string = next.has_value() && tok_is_string_character(*next, none()); - // Unlike in other shells, '&' is not special if followed by a string character. - return next_is_string; - } - default: { - return true; - } - } -} - -/// Quick test to catch the most common 'non-magical' characters, makes read_string slightly faster -/// by adding a fast path for the most common characters. This is obviously not a suitable -/// replacement for iswalpha. -static inline int myal(wchar_t c) { return (c >= L'a' && c <= L'z') || (c >= L'A' && c <= L'Z'); } - -namespace tok_modes { -enum { - regular_text = 0, // regular text - subshell = 1 << 0, // inside of subshell parentheses - array_brackets = 1 << 1, // inside of array brackets - curly_braces = 1 << 2, - char_escape = 1 << 3, -}; -} // namespace tok_modes -using tok_mode_t = uint32_t; - -/// Read the next token as a string. -tok_t tokenizer_t::read_string() { - tok_mode_t mode{tok_modes::regular_text}; - std::vector paran_offsets; - std::vector brace_offsets; - std::vector expecting; - std::vector quoted_cmdsubs; - int slice_offset = 0; - const wchar_t *const buff_start = this->token_cursor; - bool is_token_begin = true; - - auto process_opening_quote = [&](wchar_t quote) -> const wchar_t * { - const wchar_t *end = quote_end(this->token_cursor, quote); - if (end) { - if (*end == L'$') quoted_cmdsubs.push_back(paran_offsets.size()); - this->token_cursor = end; - return nullptr; - } else { - const wchar_t *error_loc = this->token_cursor; - this->token_cursor += std::wcslen(this->token_cursor); - return error_loc; - } - }; - - while (true) { - wchar_t c = *this->token_cursor; -#if false - wcstring msg = L"Handling 0x%x (%lc)"; - tok_mode mode_begin = mode; -#endif - - if (c == L'\0') { - break; - } - - // Make sure this character isn't being escaped before anything else - if ((mode & tok_modes::char_escape) == tok_modes::char_escape) { - mode &= ~(tok_modes::char_escape); - // and do nothing more - } else if (myal(c)) { - // Early exit optimization in case the character is just a letter, - // which has no special meaning to the tokenizer, i.e. the same mode continues. - } - - // Now proceed with the evaluation of the token, first checking to see if the token - // has been explicitly ignored (escaped). - else if (c == L'\\') { - mode |= tok_modes::char_escape; - } else if (c == L'#' && is_token_begin) { - this->token_cursor = comment_end(this->token_cursor) - 1; - } else if (c == L'(') { - paran_offsets.push_back(this->token_cursor - this->start); - expecting.push_back(L')'); - mode |= tok_modes::subshell; - } else if (c == L'{') { - brace_offsets.push_back(this->token_cursor - this->start); - expecting.push_back(L'}'); - mode |= tok_modes::curly_braces; - } else if (c == L')') { - if (!expecting.empty() && expecting.back() == L'}') { - return this->call_error(tokenizer_error_t::expected_bclose_found_pclose, - this->token_cursor, this->token_cursor, 1, 1); - } - if (paran_offsets.empty()) { - return this->call_error(tokenizer_error_t::closing_unopened_subshell, - this->token_cursor, this->token_cursor, 1, 1); - } - paran_offsets.pop_back(); - if (paran_offsets.empty()) { - mode &= ~(tok_modes::subshell); - } - expecting.pop_back(); - // Check if the ) completed a quoted command substitution. - if (!quoted_cmdsubs.empty() && quoted_cmdsubs.back() == paran_offsets.size()) { - quoted_cmdsubs.pop_back(); - // The "$(" part of a quoted command substitution closes double quotes. To keep - // quotes balanced, act as if there was an invisible double quote after the ")". - if (const wchar_t *error_loc = process_opening_quote(L'"')) { - if (!this->accept_unfinished) { - return this->call_error(tokenizer_error_t::unterminated_quote, buff_start, - error_loc); - } - break; - } - } - } else if (c == L'}') { - if (!expecting.empty() && expecting.back() == L')') { - return this->call_error(tokenizer_error_t::expected_pclose_found_bclose, - this->token_cursor, this->token_cursor, 1, 1); - } - if (brace_offsets.empty()) { - return this->call_error(tokenizer_error_t::closing_unopened_brace, - this->token_cursor, - this->token_cursor + wcslen(this->token_cursor)); - } - brace_offsets.pop_back(); - if (brace_offsets.empty()) { - mode &= ~(tok_modes::curly_braces); - } - expecting.pop_back(); - } else if (c == L'[') { - if (this->token_cursor != buff_start) { - mode |= tok_modes::array_brackets; - slice_offset = this->token_cursor - this->start; - } else { - // This is actually allowed so the test operator `[` can be used as the head of a - // command - } - } - // Only exit bracket mode if we are in bracket mode. - // Reason: `]` can be a parameter, e.g. last parameter to `[` test alias. - // e.g. echo $argv[([ $x -eq $y ])] # must not end bracket mode on first bracket - else if (c == L']' && ((mode & tok_modes::array_brackets) == tok_modes::array_brackets)) { - mode &= ~(tok_modes::array_brackets); - } else if (c == L'\'' || c == L'"') { - if (const wchar_t *error_loc = process_opening_quote(c)) { - if (!this->accept_unfinished) { - return this->call_error(tokenizer_error_t::unterminated_quote, buff_start, - error_loc, none(), 1); - } - break; - } - } else if (mode == tok_modes::regular_text && - !tok_is_string_character(c, this->token_cursor[1])) { - break; - } - -#if false - if (mode != mode_begin) { - msg.append(L": mode 0x%x -> 0x%x\n"); - } else { - msg.push_back(L'\n'); - } - FLOGF(error, msg.c_str(), c, c, int(mode_begin), int(mode)); -#endif - - is_token_begin = is_token_delimiter(this->token_cursor[0], this->token_cursor[1]); - this->token_cursor++; - } - - if (!this->accept_unfinished && (mode != tok_modes::regular_text)) { - // These are all "unterminated", so the only char we can mark as an error - // is the opener (the closing char could be anywhere!) - // - // (except for char_escape, which is one long by definition) - if (mode & tok_modes::char_escape) { - return this->call_error(tokenizer_error_t::unterminated_escape, buff_start, - this->token_cursor - 1, none(), 1); - } else if (mode & tok_modes::array_brackets) { - return this->call_error(tokenizer_error_t::unterminated_slice, buff_start, - this->start + slice_offset, none(), 1); - } else if (mode & tok_modes::subshell) { - assert(!paran_offsets.empty()); - size_t offset_of_open_paran = paran_offsets.back(); - - return this->call_error(tokenizer_error_t::unterminated_subshell, buff_start, - this->start + offset_of_open_paran, none(), 1); - } else if (mode & tok_modes::curly_braces) { - assert(!brace_offsets.empty()); - size_t offset_of_open_brace = brace_offsets.back(); - - return this->call_error(tokenizer_error_t::unterminated_brace, buff_start, - this->start + offset_of_open_brace, none(), 1); - } else { - DIE("Unknown non-regular-text mode"); - } - } - - tok_t result(token_type_t::string); - result.offset = buff_start - this->start; - result.length = this->token_cursor - buff_start; - return result; -} - -// Parse an fd from the non-empty string [start, end), all of which are digits. -// Return the fd, or -1 on overflow. -static int parse_fd(const wchar_t *start, const wchar_t *end) { - assert(start < end && "String cannot be empty"); - long long big_fd = 0; - for (const wchar_t *cursor = start; cursor < end; ++cursor) { - assert(L'0' <= *cursor && *cursor <= L'9' && "Not a digit"); - big_fd = big_fd * 10 + (*cursor - L'0'); - if (big_fd > INT_MAX) return -1; - } - assert(big_fd <= INT_MAX && "big_fd should be in range"); - return static_cast(big_fd); -} - -pipe_or_redir_t::pipe_or_redir_t() = default; - -maybe_t pipe_or_redir_t::from_string(const wchar_t *buff) { - pipe_or_redir_t result{}; - - /* Examples of supported syntaxes. - Note we are only responsible for parsing the redirection part, not 'cmd' or 'file'. - - cmd | cmd normal pipe - cmd &| cmd normal pipe plus stderr-merge - cmd >| cmd pipe with explicit fd - cmd 2>| cmd pipe with explicit fd - cmd < file stdin redirection - cmd > file redirection - cmd >> file appending redirection - cmd >? file noclobber redirection - cmd >>? file appending noclobber redirection - cmd 2> file file redirection with explicit fd - cmd >&2 fd redirection with no explicit src fd (stdout is used) - cmd 1>&2 fd redirection with an explicit src fd - cmd <&2 fd redirection with no explicit src fd (stdin is used) - cmd 3<&0 fd redirection with an explicit src fd - cmd &> file redirection with stderr merge - cmd ^ file caret (stderr) redirection, perhaps disabled via feature flags - cmd ^^ file caret (stderr) redirection, perhaps disabled via feature flags - */ - - const wchar_t *cursor = buff; - - // Extract a range of leading fd. - const wchar_t *fd_start = cursor; - while (iswdigit(*cursor)) cursor++; - const wchar_t *fd_end = cursor; - bool has_fd = (fd_end > fd_start); - - // Try consuming a given character. - // Return true if consumed. On success, advances cursor. - auto try_consume = [&cursor](wchar_t c) -> bool { - if (*cursor != c) return false; - cursor++; - return true; - }; - - // Like try_consume, but asserts on failure. - auto consume = [&](wchar_t c) { - assert(*cursor == c && "Failed to consume char"); - cursor++; - }; - - switch (*cursor) { - case L'|': { - if (has_fd) { - // Like 123| - return none(); - } - consume(L'|'); - assert(*cursor != L'|' && - "|| passed as redirection, this should have been handled as 'or' by the caller"); - result.fd = STDOUT_FILENO; - result.is_pipe = true; - break; - } - case L'>': { - consume(L'>'); - if (try_consume(L'>')) result.mode = redirection_mode_t::append; - if (try_consume(L'|')) { - // Note we differ from bash here. - // Consider `echo foo 2>| bar` - // In fish, this is a *pipe*. Run bar as a command and attach foo's stderr to bar's - // stdin, while leaving stdout as tty. - // In bash, this is a *redirection* to bar as a file. It is like > but ignores - // noclobber. - result.is_pipe = true; - result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 2>| - : STDOUT_FILENO; // like >| - } else if (try_consume(L'&')) { - // This is a redirection to an fd. - // Note that we allow ">>&", but it's still just writing to the fd - "appending" to - // it doesn't make sense. - result.mode = redirection_mode_t::fd; - result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1>&2 - : STDOUT_FILENO; // like >&2 - } else { - // This is a redirection to a file. - result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1> file.txt - : STDOUT_FILENO; // like > file.txt - if (result.mode != redirection_mode_t::append) - result.mode = redirection_mode_t::overwrite; - // Note 'echo abc >>? file' is valid: it means append and noclobber. - // But here "noclobber" means the file must not exist, so appending - // can be ignored. - if (try_consume(L'?')) result.mode = redirection_mode_t::noclob; - } - break; - } - case L'<': { - consume(L'<'); - if (try_consume('&')) { - result.mode = redirection_mode_t::fd; - } else { - result.mode = redirection_mode_t::input; - } - result.fd = has_fd ? parse_fd(fd_start, fd_end) // like 1<&3 or 1< /tmp/file.txt - : STDIN_FILENO; // like <&3 or < /tmp/file.txt - break; - } - case L'&': { - consume(L'&'); - if (try_consume(L'|')) { - // &| is pipe with stderr merge. - result.fd = STDOUT_FILENO; - result.is_pipe = true; - result.stderr_merge = true; - } else if (try_consume(L'>')) { - result.fd = STDOUT_FILENO; - result.stderr_merge = true; - result.mode = redirection_mode_t::overwrite; - if (try_consume(L'>')) result.mode = redirection_mode_t::append; // like &>> - if (try_consume(L'?')) - result.mode = redirection_mode_t::noclob; // like &>? or &>>? - } else { - return none(); - } - break; - } - default: { - // Not a redirection. - return none(); - } - } - - result.consumed = (cursor - buff); - assert(result.consumed > 0 && "Should have consumed at least one character on success"); - return result; -} - -int pipe_or_redir_t::oflags() const { - switch (mode) { - case redirection_mode_t::append: { - return O_CREAT | O_APPEND | O_WRONLY; - } - case redirection_mode_t::overwrite: { - return O_CREAT | O_WRONLY | O_TRUNC; - } - case redirection_mode_t::noclob: { - return O_CREAT | O_EXCL | O_WRONLY; - } - case redirection_mode_t::input: { - return O_RDONLY; - } - case redirection_mode_t::fd: - default: { - return -1; - } - } -} - -/// Test if a character is whitespace. Differs from iswspace in that it does not consider a -/// newline to be whitespace. -static bool iswspace_not_nl(wchar_t c) { - switch (c) { - case L' ': - case L'\t': - case L'\r': - return true; - case L'\n': - return false; - default: - return iswspace(c); - } -} - -maybe_t tokenizer_t::next() { - if (!this->has_next) { - return none(); - } - - // Consume non-newline whitespace. If we get an escaped newline, mark it and continue past - // it. - for (;;) { - if (this->token_cursor[0] == L'\\' && this->token_cursor[1] == L'\n') { - this->token_cursor += 2; - this->continue_line_after_comment = true; - } else if (iswspace_not_nl(this->token_cursor[0])) { - this->token_cursor++; - } else { - break; - } - } - - while (*this->token_cursor == L'#') { - // We have a comment, walk over the comment. - const wchar_t *comment_start = this->token_cursor; - this->token_cursor = comment_end(this->token_cursor); - size_t comment_len = this->token_cursor - comment_start; - - // If we are going to continue after the comment, skip any trailing newline. - if (this->token_cursor[0] == L'\n' && this->continue_line_after_comment) - this->token_cursor++; - - // Maybe return the comment. - if (this->show_comments) { - tok_t result(token_type_t::comment); - result.offset = comment_start - this->start; - result.length = comment_len; - return result; - } - while (iswspace_not_nl(this->token_cursor[0])) this->token_cursor++; - } - - // We made it past the comments and ate any trailing newlines we wanted to ignore. - this->continue_line_after_comment = false; - const size_t start_pos = this->token_cursor - this->start; - - maybe_t result{}; - switch (*this->token_cursor) { - case L'\0': { - this->has_next = false; - return none(); - } - case L'\r': // carriage-return - case L'\n': // newline - case L';': { - result.emplace(token_type_t::end); - result->offset = start_pos; - result->length = 1; - this->token_cursor++; - // Hack: when we get a newline, swallow as many as we can. This compresses multiple - // subsequent newlines into a single one. - if (!this->show_blank_lines) { - while (*this->token_cursor == L'\n' || *this->token_cursor == 13 /* CR */ || - *this->token_cursor == ' ' || *this->token_cursor == '\t') { - this->token_cursor++; - } - } - break; - } - case L'&': { - if (this->token_cursor[1] == L'&') { - // && is and. - result.emplace(token_type_t::andand); - result->offset = start_pos; - result->length = 2; - this->token_cursor += 2; - } else if (this->token_cursor[1] == L'>' || this->token_cursor[1] == L'|') { - // &> and &| redirect both stdout and stderr. - auto redir = pipe_or_redir_t::from_string(this->token_cursor); - assert(redir.has_value() && - "Should always succeed to parse a &> or &| redirection"); - result.emplace(redir->token_type()); - result->offset = start_pos; - result->length = redir->consumed; - this->token_cursor += redir->consumed; - } else { - result.emplace(token_type_t::background); - result->offset = start_pos; - result->length = 1; - this->token_cursor++; - } - break; - } - case L'|': { - if (this->token_cursor[1] == L'|') { - // || is or. - result.emplace(token_type_t::oror); - result->offset = start_pos; - result->length = 2; - this->token_cursor += 2; - } else if (this->token_cursor[1] == L'&') { - // |& is a bashism; in fish it's &|. - return this->call_error(tokenizer_error_t::invalid_pipe_ampersand, - this->token_cursor, this->token_cursor, 2, 2); - } else { - auto pipe = pipe_or_redir_t::from_string(this->token_cursor); - assert(pipe.has_value() && pipe->is_pipe && - "Should always succeed to parse a | pipe"); - result.emplace(pipe->token_type()); - result->offset = start_pos; - result->length = pipe->consumed; - this->token_cursor += pipe->consumed; - } - break; - } - case L'>': - case L'<': { - // There's some duplication with the code in the default case below. The key - // difference here is that we must never parse these as a string; a failed - // redirection is an error! - auto redir_or_pipe = pipe_or_redir_t::from_string(this->token_cursor); - if (!redir_or_pipe || redir_or_pipe->fd < 0) { - return this->call_error(tokenizer_error_t::invalid_redirect, this->token_cursor, - this->token_cursor, - redir_or_pipe ? redir_or_pipe->consumed : 0, - redir_or_pipe ? redir_or_pipe->consumed : 0); - } - result.emplace(redir_or_pipe->token_type()); - result->offset = start_pos; - result->length = redir_or_pipe->consumed; - this->token_cursor += redir_or_pipe->consumed; - break; - } - default: { - // Maybe a redirection like '2>&1', maybe a pipe like 2>|, maybe just a string. - const wchar_t *error_location = this->token_cursor; - maybe_t redir_or_pipe{}; - if (iswdigit(*this->token_cursor)) { - redir_or_pipe = pipe_or_redir_t::from_string(this->token_cursor); - } - - if (redir_or_pipe) { - // It looks like a redirection or a pipe. But we don't support piping fd 0. Note - // that fd 0 may be -1, indicating overflow; but we don't treat that as a - // tokenizer error. - if (redir_or_pipe->is_pipe && redir_or_pipe->fd == 0) { - return this->call_error(tokenizer_error_t::invalid_pipe, error_location, - error_location, redir_or_pipe->consumed, - redir_or_pipe->consumed); - } - result.emplace(redir_or_pipe->token_type()); - result->offset = start_pos; - result->length = redir_or_pipe->consumed; - this->token_cursor += redir_or_pipe->consumed; - } else { - // Not a redirection or pipe, so just a string. - result = this->read_string(); - } - break; - } - } - assert(result.has_value() && "Should have a token"); - return result; -} - -bool is_token_delimiter(wchar_t c, maybe_t next) { - return c == L'(' || !tok_is_string_character(c, std::move(next)); -} - -wcstring tok_command(const wcstring &str) { - tokenizer_t t(str.c_str(), 0); - while (auto token = t.next()) { - if (token->type != token_type_t::string) { - return {}; - } - wcstring text = t.text_of(*token); - if (variable_assignment_equals_pos(text).has_value()) { - continue; - } - return text; - } - return {}; -} - -bool move_word_state_machine_t::consume_char_punctuation(wchar_t c) { - enum { s_always_one = 0, s_rest, s_whitespace_rest, s_whitespace, s_alphanumeric, s_end }; - - bool consumed = false; - while (state != s_end && !consumed) { - switch (state) { - case s_always_one: { - // Always consume the first character. - consumed = true; - if (iswspace(c)) { - state = s_whitespace; - } else if (iswalnum(c)) { - state = s_alphanumeric; - } else { - // Don't allow switching type (ws->nonws) after non-whitespace and - // non-alphanumeric. - state = s_rest; - } - break; - } - case s_rest: { - if (iswspace(c)) { - // Consume only trailing whitespace. - state = s_whitespace_rest; - } else if (iswalnum(c)) { - // Consume only alnums. - state = s_alphanumeric; - } else { - consumed = false; - state = s_end; - } - break; - } - case s_whitespace_rest: - case s_whitespace: { - // "whitespace" consumes whitespace and switches to alnums, - // "whitespace_rest" only consumes whitespace. - if (iswspace(c)) { - // Consumed whitespace. - consumed = true; - } else { - state = state == s_whitespace ? s_alphanumeric : s_end; - } - break; - } - case s_alphanumeric: { - if (iswalnum(c)) { - consumed = true; // consumed alphanumeric - } else { - state = s_end; - } - break; - } - case s_end: - default: { - break; - } - } - } - return consumed; -} - -bool move_word_state_machine_t::is_path_component_character(wchar_t c) { - return tok_is_string_character(c, none()) && !std::wcschr(L"/={,}'\":@", c); -} - -bool move_word_state_machine_t::consume_char_path_components(wchar_t c) { - enum { - s_initial_punctuation, - s_whitespace, - s_separator, - s_slash, - s_path_component_characters, - s_initial_separator, - s_end - }; - - bool consumed = false; - while (state != s_end && !consumed) { - switch (state) { - case s_initial_punctuation: { - if (!is_path_component_character(c) && !iswspace(c)) { - state = s_initial_separator; - } else { - if (!is_path_component_character(c)) { - consumed = true; - } - state = s_whitespace; - } - break; - } - case s_whitespace: { - if (iswspace(c)) { - consumed = true; // consumed whitespace - } else if (c == L'/' || is_path_component_character(c)) { - state = s_slash; // path component - } else { - state = s_separator; // path separator - } - break; - } - case s_separator: { - if (!iswspace(c) && !is_path_component_character(c)) { - consumed = true; // consumed separator - } else { - state = s_end; - } - break; - } - case s_slash: { - if (c == L'/') { - consumed = true; // consumed slash - } else { - state = s_path_component_characters; - } - break; - } - case s_path_component_characters: { - if (is_path_component_character(c)) { - consumed = true; // consumed string character except slash - } else { - state = s_end; - } - break; - } - case s_initial_separator: { - if (is_path_component_character(c)) { - consumed = true; - state = s_path_component_characters; - } else if (iswspace(c)) { - state = s_end; - } else { - consumed = true; - } - break; - } - case s_end: - default: { - break; - } - } - } - return consumed; -} - -bool move_word_state_machine_t::consume_char_whitespace(wchar_t c) { - // Consume a "word" of printable characters plus any leading whitespace. - enum { s_always_one = 0, s_blank, s_graph, s_end }; - - bool consumed = false; - while (state != s_end && !consumed) { - switch (state) { - case s_always_one: { - consumed = true; // always consume the first character - // If it's not whitespace, only consume those from here. - if (!iswspace(c)) { - state = s_graph; - } else { - // If it's whitespace, keep consuming whitespace until the graphs. - state = s_blank; - } - break; - } - case s_blank: { - if (iswspace(c)) { - consumed = true; // consumed whitespace - } else { - state = s_graph; - } - break; - } - case s_graph: { - if (!iswspace(c)) { - consumed = true; // consumed printable non-space - } else { - state = s_end; - } - break; - } - case s_end: - default: { - break; - } - } - } - return consumed; -} - -bool move_word_state_machine_t::consume_char(wchar_t c) { - switch (style) { - case move_word_style_punctuation: { - return consume_char_punctuation(c); - } - case move_word_style_path_components: { - return consume_char_path_components(c); - } - case move_word_style_whitespace: { - return consume_char_whitespace(c); - } - } - - DIE("should not reach this statement"); // silence some compiler errors about not returning -} - -move_word_state_machine_t::move_word_state_machine_t(move_word_style_t syl) - : state(0), style(syl) {} - -void move_word_state_machine_t::reset() { state = 0; } - -// Return the location of the equals sign, or none if the string does -// not look like a variable assignment like FOO=bar. The detection -// works similar as in some POSIX shells: only letters and numbers qre -// allowed on the left hand side, no quotes or escaping. -maybe_t variable_assignment_equals_pos(const wcstring &txt) { - enum { init, has_some_variable_identifier } state = init; - // TODO bracket indexing - for (size_t i = 0; i < txt.size(); i++) { - wchar_t c = txt[i]; - if (state == init) { - if (!valid_var_name_char(c)) return {}; - state = has_some_variable_identifier; - } else { - if (c == '=') return {i}; - if (!valid_var_name_char(c)) return {}; - } - } - return {}; -} diff --git a/src/tokenizer.h b/src/tokenizer.h index 3fd12466f..5ad2ff3de 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -1,5 +1,3 @@ -// A specialized tokenizer for tokenizing the fish language. In the future, the tokenizer should be -// extended to support marks, tokenizing multiple strings and disposing of unused string segments. #ifndef FISH_TOKENIZER_H #define FISH_TOKENIZER_H @@ -11,35 +9,27 @@ #include "parse_constants.h" #include "redirection.h" -/// Token types. XXX Why this isn't parse_token_type_t, I'm not really sure. -enum class token_type_t : uint8_t { - error, /// Error reading token - string, /// String token - pipe, /// Pipe token - andand, /// && token - oror, /// || token - end, /// End token (semicolon or newline, not literal end) - redirect, /// redirection token - background, /// send job to bg token - comment, /// comment token -}; - -/// Flag telling the tokenizer to accept incomplete parameters, i.e. parameters with mismatching -/// parenthesis, etc. This is useful for tab-completion. -#define TOK_ACCEPT_UNFINISHED 1 - -/// Flag telling the tokenizer not to remove comments. Useful for syntax highlighting. -#define TOK_SHOW_COMMENTS 2 - -/// Ordinarily, the tokenizer ignores newlines following a newline, or a semicolon. This flag tells -/// the tokenizer to return each of them as a separate END. -#define TOK_SHOW_BLANK_LINES 4 - -/// Make an effort to continue after an error. -#define TOK_CONTINUE_AFTER_ERROR 8 - using tok_flags_t = unsigned int; +#define TOK_ACCEPT_UNFINISHED 1 +#define TOK_SHOW_COMMENTS 2 +#define TOK_SHOW_BLANK_LINES 4 +#define TOK_CONTINUE_AFTER_ERROR 8 + +#if INCLUDE_RUST_HEADERS + +#include "tokenizer.rs.h" +using token_type_t = TokenType; +using tokenizer_error_t = TokenizerError; +using tok_t = Tok; +using tokenizer_t = Tokenizer; +using pipe_or_redir_t = PipeOrRedir; +using move_word_state_machine_t = MoveWordStateMachine; +using move_word_style_t = MoveWordStyle; + +#else + +// Hacks to allow us to compile without Rust headers. enum class tokenizer_error_t : uint8_t { none, unterminated_quote, @@ -57,158 +47,6 @@ enum class tokenizer_error_t : uint8_t { expected_bclose_found_pclose, }; -/// Get the error message for an error \p err. -const wchar_t *tokenizer_get_error_message(tokenizer_error_t err); - -struct tok_t { - // Offset of the token. - source_offset_t offset{0}; - // Length of the token. - source_offset_t length{0}; - - // If an error, this is the offset of the error within the token. A value of 0 means it occurred - // at 'offset'. - source_offset_t error_offset_within_token{SOURCE_OFFSET_INVALID}; - source_offset_t error_length{0}; - - // If an error, this is the error code. - tokenizer_error_t error{tokenizer_error_t::none}; - - // The type of the token. - token_type_t type; - - // Construct from a token type. - explicit tok_t(token_type_t type); - - /// Returns whether the given location is within the source range or at its end. - bool location_in_or_at_end_of_source_range(size_t loc) const { - return offset <= loc && loc - offset <= length; - } - /// Gets source for the token, or the empty string if it has no source. - wcstring get_source(const wcstring &str) const { return wcstring(str, offset, length); } -}; -static_assert(sizeof(tok_t) <= 32, "tok_t expected to be 32 bytes or less"); - -/// The tokenizer struct. -class tokenizer_t : noncopyable_t { - /// A pointer into the original string, showing where the next token begins. - const wchar_t *token_cursor; - /// The start of the original string. - const wchar_t *const start; - /// Whether we have additional tokens. - bool has_next{true}; - /// Whether incomplete tokens are accepted. - bool accept_unfinished{false}; - /// Whether comments should be returned. - bool show_comments{false}; - /// Whether all blank lines are returned. - bool show_blank_lines{false}; - /// Whether to attempt to continue after an error. - bool continue_after_error{false}; - /// Whether to continue the previous line after the comment. - bool continue_line_after_comment{false}; - - tok_t call_error(tokenizer_error_t error_type, const wchar_t *token_start, - const wchar_t *error_loc, maybe_t token_length = {}, - size_t error_len = 0); - tok_t read_string(); - - public: - /// Constructor for a tokenizer. b is the string that is to be tokenized. It is not copied, and - /// should not be freed by the caller until after the tokenizer is destroyed. - /// - /// \param b The string to tokenize - /// \param flags Flags to the tokenizer. Setting TOK_ACCEPT_UNFINISHED will cause the tokenizer - /// to accept incomplete tokens, such as a subshell without a closing parenthesis, as a valid - /// token. Setting TOK_SHOW_COMMENTS will return comments as tokens - tokenizer_t(const wchar_t *start, tok_flags_t flags); - - /// Returns the next token, or none() if we are at the end. - maybe_t next(); - - /// Returns the text of a token, as a string. - wcstring text_of(const tok_t &tok) const { return wcstring(start + tok.offset, tok.length); } - - /// Copies a token's text into a string. This is useful for reusing storage. - /// Returns a reference to the string. - const wcstring ©_text_of(const tok_t &tok, wcstring *result) { - return result->assign(start + tok.offset, tok.length); - } -}; - -/// Tests if this character can delimit tokens. -bool is_token_delimiter(wchar_t c, maybe_t next); - -/// \return the first token from the string, skipping variable assignments like A=B. -wcstring tok_command(const wcstring &str); - -/// Struct wrapping up a parsed pipe or redirection. -struct pipe_or_redir_t { - // The redirected fd, or -1 on overflow. - // In the common case of a pipe, this is 1 (STDOUT_FILENO). - // For example, in the case of "3>&1" this will be 3. - int fd{-1}; - - // Whether we are a pipe (true) or redirection (false). - bool is_pipe{false}; - - // The redirection mode if the type is redirect. - // Ignored for pipes. - redirection_mode_t mode{redirection_mode_t::overwrite}; - - // Whether, in addition to this redirection, stderr should also be dup'd to stdout - // For example &| or &> - bool stderr_merge{false}; - - // Number of characters consumed when parsing the string. - size_t consumed{0}; - - // Construct from a string. - static maybe_t from_string(const wchar_t *buff); - static maybe_t from_string(const wcstring &buff) { - return from_string(buff.c_str()); - } - - // \return the oflags (as in open(2)) for this redirection. - int oflags() const; - - // \return if we are "valid". Here "valid" means only that the source fd did not overflow. - // For example 99999999999> is invalid. - bool is_valid() const { return fd >= 0; } - - // \return the token type for this redirection. - token_type_t token_type() const { - return is_pipe ? token_type_t::pipe : token_type_t::redirect; - } - - private: - pipe_or_redir_t(); -}; - -enum move_word_style_t { - move_word_style_punctuation, // stop at punctuation - move_word_style_path_components, // stops at path components - move_word_style_whitespace // stops at whitespace -}; - -/// Our state machine that implements "one word" movement or erasure. -class move_word_state_machine_t { - private: - bool consume_char_punctuation(wchar_t c); - bool consume_char_path_components(wchar_t c); - bool is_path_component_character(wchar_t c); - bool consume_char_whitespace(wchar_t c); - - int state; - move_word_style_t style; - - public: - explicit move_word_state_machine_t(move_word_style_t syl); - bool consume_char(wchar_t c); - void reset(); -}; - -/// The position of the equal sign in a variable assignment like foo=bar. -maybe_t variable_assignment_equals_pos(const wcstring &txt); +#endif #endif diff --git a/src/topic_monitor.cpp b/src/topic_monitor.cpp deleted file mode 100644 index 626d3eec6..000000000 --- a/src/topic_monitor.cpp +++ /dev/null @@ -1,283 +0,0 @@ -#include "config.h" // IWYU pragma: keep - -#include "topic_monitor.h" - -#include - -#include - -#include "flog.h" -#include "iothread.h" -#include "maybe.h" -#include "wcstringutil.h" -#include "wutil.h" - -wcstring generation_list_t::describe() const { - wcstring result; - for (generation_t gen : this->as_array()) { - if (!result.empty()) result.push_back(L','); - if (gen == invalid_generation) { - result.append(L"-1"); - } else { - result.append(to_string(gen)); - } - } - return result; -} - -binary_semaphore_t::binary_semaphore_t() : sem_ok_(false) { - // sem_init always fails with ENOSYS on Mac and has an annoying deprecation warning. - // On BSD sem_init uses a file descriptor under the hood which doesn't get CLOEXEC (see #7304). - // So use fast semaphores on Linux only. -#ifdef __linux__ - sem_ok_ = (0 == sem_init(&sem_, 0, 0)); -#endif - if (!sem_ok_) { - auto pipes = make_autoclose_pipes(); - assert(pipes.has_value() && "Failed to make pubsub pipes"); - pipes_ = pipes.acquire(); - - // Whoof. Thread Sanitizer swallows signals and replays them at its leisure, at the point - // where instrumented code makes certain blocking calls. But tsan cannot interrupt a signal - // call, so if we're blocked in read() (like the topic monitor wants to be!), we'll never - // receive SIGCHLD and so deadlock. So if tsan is enabled, we mark our fd as non-blocking - // (so reads will never block) and use select() to poll it. -#ifdef FISH_TSAN_WORKAROUNDS - DIE_ON_FAILURE(make_fd_nonblocking(pipes_.read.fd())); -#endif - } -} - -binary_semaphore_t::~binary_semaphore_t() { - // We never use sem_t on Mac. The #ifdef avoids deprecation warnings. -#ifndef __APPLE__ - if (sem_ok_) (void)sem_destroy(&sem_); -#endif -} - -void binary_semaphore_t::die(const wchar_t *msg) const { - wperror(msg); - DIE("unexpected failure"); -} - -void binary_semaphore_t::post() { - if (sem_ok_) { - int res = sem_post(&sem_); - // sem_post is non-interruptible. - if (res < 0) die(L"sem_post"); - } else { - // Write exactly one byte. - ssize_t ret; - do { - const uint8_t v = 0; - ret = write(pipes_.write.fd(), &v, sizeof v); - } while (ret < 0 && errno == EINTR); - if (ret < 0) die(L"write"); - } -} - -void binary_semaphore_t::wait() { - if (sem_ok_) { - int res; - do { - res = sem_wait(&sem_); - } while (res < 0 && errno == EINTR); - // Other errors here are very unexpected. - if (res < 0) die(L"sem_wait"); - } else { - int fd = pipes_.read.fd(); - // We must read exactly one byte. - for (;;) { -#ifdef FISH_TSAN_WORKAROUNDS - // Under tsan our notifying pipe is non-blocking, so we would busy-loop on the read() - // call until data is available (that is, fish would use 100% cpu while waiting for - // processes). This call prevents that. - (void)fd_readable_set_t::is_fd_readable(fd, fd_readable_set_t::kNoTimeout); -#endif - uint8_t ignored; - auto amt = read(fd, &ignored, sizeof ignored); - if (amt == 1) break; - // EAGAIN should only be returned in TSan case. - if (amt < 0 && errno != EINTR && errno != EAGAIN && errno != EWOULDBLOCK) die(L"read"); - } - } -} - -/// Implementation of the principal monitor. This uses new (and leaks) to avoid registering a -/// pointless at-exit handler for the dtor. -static topic_monitor_t *const s_principal = new topic_monitor_t(); - -topic_monitor_t &topic_monitor_t::principal() { - // Do not attempt to move s_principal to a function-level static, it needs to be accessed from a - // signal handler so it must not be lazily created. - return *s_principal; -} - -topic_monitor_t::topic_monitor_t() = default; -topic_monitor_t::~topic_monitor_t() = default; - -void topic_monitor_t::post(topic_t topic) { - // Beware, we may be in a signal handler! - // Atomically update the pending topics. - const uint8_t topicbit = topic_to_bit(topic); - - // CAS in our bit, capturing the old status value. - status_bits_t oldstatus; - bool cas_success = false; - while (!cas_success) { - oldstatus = status_.load(std::memory_order_relaxed); - // Clear wakeup bit and set our topic bit. - status_bits_t newstatus = oldstatus; - newstatus &= ~STATUS_NEEDS_WAKEUP; - newstatus |= topicbit; - cas_success = status_.compare_exchange_weak(oldstatus, newstatus); - } - // Note that if the STATUS_NEEDS_WAKEUP bit is set, no other bits must be set. - assert(((oldstatus == STATUS_NEEDS_WAKEUP) == bool(oldstatus & STATUS_NEEDS_WAKEUP)) && - "If STATUS_NEEDS_WAKEUP is set no other bits should be set"); - - // If the bit was already set, then someone else posted to this topic and nobody has reacted to - // it yet. In that case we're done. - if (oldstatus & topicbit) { - return; - } - - // We set a new bit. - // Check if we should wake up a thread because it was waiting. - if (oldstatus & STATUS_NEEDS_WAKEUP) { - std::atomic_thread_fence(std::memory_order_release); - sema_.post(); - } -} - -generation_list_t topic_monitor_t::updated_gens_in_data(acquired_lock &data) { - // Atomically acquire the pending updates, swapping in 0. - // If there are no pending updates (likely) or a thread is waiting, just return. - // Otherwise CAS in 0 and update our topics. - const auto relaxed = std::memory_order_relaxed; - topic_bitmask_t changed_topic_bits; - bool cas_success; - do { - changed_topic_bits = status_.load(relaxed); - if (changed_topic_bits == 0 || changed_topic_bits == STATUS_NEEDS_WAKEUP) - return data->current; - cas_success = status_.compare_exchange_weak(changed_topic_bits, 0); - } while (!cas_success); - assert((changed_topic_bits & STATUS_NEEDS_WAKEUP) == 0 && - "Thread waiting bit should not be set"); - - // Update the current generation with our topics and return it. - for (topic_t topic : all_topics()) { - if (changed_topic_bits & topic_to_bit(topic)) { - data->current.at(topic) += 1; - FLOG(topic_monitor, "Updating topic", static_cast(topic), "to", - data->current.at(topic)); - } - } - // Report our change. - data_notifier_.notify_all(); - return data->current; -} - -generation_list_t topic_monitor_t::updated_gens() { - auto data = data_.acquire(); - return updated_gens_in_data(data); -} - -bool topic_monitor_t::try_update_gens_maybe_becoming_reader(generation_list_t *gens) { - bool become_reader = false; - auto data = data_.acquire(); - for (;;) { - // See if the updated gen list has changed. If so we don't need to become the reader. - auto current = updated_gens_in_data(data); - FLOG(topic_monitor, "TID", thread_id(), "local ", gens->describe(), ": current", - current.describe()); - if (*gens != current) { - *gens = current; - break; - } - - // The generations haven't changed. Perhaps we become the reader. - // Note we still hold the lock, so this cannot race with any other thread becoming the - // reader. - if (data->has_reader) { - // We already have a reader, wait for it to notify us and loop again. - data_notifier_.wait(data.get_lock()); - continue; - } else { - // We will try to become the reader. - // Reader bit should not be set in this case. - assert((status_.load() & STATUS_NEEDS_WAKEUP) == 0 && "No thread should be waiting"); - // Try becoming the reader by marking the reader bit. - status_bits_t expected_old = 0; - if (!status_.compare_exchange_strong(expected_old, STATUS_NEEDS_WAKEUP)) { - // We failed to become the reader, perhaps because another topic post just arrived. - // Loop again. - continue; - } - // We successfully did a CAS from 0 -> STATUS_NEEDS_WAKEUP. - // Now any successive topic post must signal us. - FLOG(topic_monitor, "TID", thread_id(), "becoming reader"); - become_reader = true; - data->has_reader = true; - break; - } - } - return become_reader; -} - -generation_list_t topic_monitor_t::await_gens(const generation_list_t &input_gens) { - generation_list_t gens = input_gens; - while (gens == input_gens) { - bool become_reader = try_update_gens_maybe_becoming_reader(&gens); - if (become_reader) { - // Now we are the reader. Read from the pipe, and then update with any changes. - // Note we no longer hold the lock. - assert(gens == input_gens && - "Generations should not have changed if we are the reader."); - - // Wait to be woken up. - sema_.wait(); - - // We are finished waiting. We must stop being the reader, and post on the condition - // variable to wake up any other threads waiting for us to finish reading. - auto data = data_.acquire(); - gens = data->current; - FLOG(topic_monitor, "TID", thread_id(), "local", input_gens.describe(), - "read() complete, current is", gens.describe()); - assert(data->has_reader && "We should be the reader"); - data->has_reader = false; - data_notifier_.notify_all(); - } - } - return gens; -} - -bool topic_monitor_t::check(generation_list_t *gens, bool wait) { - if (!gens->any_valid()) return false; - - generation_list_t current = updated_gens(); - bool changed = false; - for (;;) { - // Load the topic list and see if anything has changed. - for (topic_t topic : all_topics()) { - if (gens->is_valid(topic)) { - assert(gens->at(topic) <= current.at(topic) && - "Incoming gen count exceeded published count"); - if (gens->at(topic) < current.at(topic)) { - gens->at(topic) = current.at(topic); - changed = true; - } - } - } - - // If we're not waiting, or something changed, then we're done. - if (!wait || changed) { - break; - } - - // Wait until our gens change. - current = await_gens(current); - } - return changed; -} diff --git a/src/topic_monitor.h b/src/topic_monitor.h index adc54f5e4..f62cb9499 100644 --- a/src/topic_monitor.h +++ b/src/topic_monitor.h @@ -1,259 +1,25 @@ #ifndef FISH_TOPIC_MONITOR_H #define FISH_TOPIC_MONITOR_H -#include +#include "config.h" -#include -#include -#include // IWYU pragma: keep -#include -#include -#include +#include -#include "common.h" -#include "fds.h" - -/** Topic monitoring support. Topics are conceptually "a thing that can happen." For example, - delivery of a SIGINT, a child process exits, etc. It is possible to post to a topic, which means - that that thing happened. - - Associated with each topic is a current generation, which is a 64 bit value. When you query a - topic, you get back a generation. If on the next query the generation has increased, then it - indicates someone posted to the topic. - - For example, if you are monitoring a child process, you can query the sigchld topic. If it has - increased since your last query, it is possible that your child process has exited. - - Topic postings may be coalesced. That is there may be two posts to a given topic, yet the - generation only increases by 1. The only guarantee is that after a topic post, the current - generation value is larger than any value previously queried. - - Tying this all together is the topic_monitor_t. This provides the current topic generations, and - also provides the ability to perform a blocking wait for any topic to change in a particular topic - set. This is the real power of topics: you can wait for a sigchld signal OR a thread exit. - */ - -/// A generation is a counter incremented every time the value of a topic changes. -/// It is 64 bit so it will never wrap. using generation_t = uint64_t; -/// A generation value which indicates the topic is not of interest. -constexpr generation_t invalid_generation = std::numeric_limits::max(); +#if INCLUDE_RUST_HEADERS -/// The list of topics which may be observed. -enum class topic_t : uint8_t { - sighupint, // Corresponds to both SIGHUP and SIGINT signals. - sigchld, // Corresponds to SIGCHLD signal. - internal_exit, // Corresponds to an internal process exit. -}; +#include "topic_monitor.rs.h" -/// Helper to return all topics, allowing easy iteration. -inline std::array all_topics() { - return {{topic_t::sighupint, topic_t::sigchld, topic_t::internal_exit}}; -} +#else -/// Simple value type containing the values for a topic. -/// This should be kept in sync with topic_t. -class generation_list_t { - public: - generation_list_t() = default; - - generation_t sighupint{0}; - generation_t sigchld{0}; - generation_t internal_exit{0}; - - /// \return the value for a topic. - generation_t &at(topic_t topic) { - switch (topic) { - case topic_t::sigchld: - return sigchld; - case topic_t::sighupint: - return sighupint; - case topic_t::internal_exit: - return internal_exit; - } - DIE("Unreachable"); - } - - generation_t at(topic_t topic) const { - switch (topic) { - case topic_t::sighupint: - return sighupint; - case topic_t::sigchld: - return sigchld; - case topic_t::internal_exit: - return internal_exit; - } - DIE("Unreachable"); - } - - /// \return ourselves as an array. - std::array as_array() const { return {{sighupint, sigchld, internal_exit}}; } - - /// Set the value of \p topic to the smaller of our value and the value in \p other. - void set_min_from(topic_t topic, const generation_list_t &other) { - if (this->at(topic) > other.at(topic)) { - this->at(topic) = other.at(topic); - } - } - - /// \return whether a topic is valid. - bool is_valid(topic_t topic) const { return this->at(topic) != invalid_generation; } - - /// \return whether any topic is valid. - bool any_valid() const { - bool valid = false; - for (auto gen : as_array()) { - if (gen != invalid_generation) valid = true; - } - return valid; - } - - bool operator==(const generation_list_t &rhs) const { - return sighupint == rhs.sighupint && sigchld == rhs.sigchld && - internal_exit == rhs.internal_exit; - } - - bool operator!=(const generation_list_t &rhs) const { return !(*this == rhs); } - - /// return a string representation for debugging. - wcstring describe() const; - - /// Generation list containing invalid generations only. - static generation_list_t invalids() { - return generation_list_t(invalid_generation, invalid_generation, invalid_generation); - } - - private: - generation_list_t(generation_t sighupint, generation_t sigchld, generation_t internal_exit) - : sighupint(sighupint), sigchld(sigchld), internal_exit(internal_exit) {} -}; - -/// A simple binary semaphore. -/// On systems that do not support unnamed semaphores (macOS in particular) this is built on top of -/// a self-pipe. Note that post() must be async-signal safe. -class binary_semaphore_t { - public: - binary_semaphore_t(); - ~binary_semaphore_t(); - - /// Release a waiting thread. - void post(); - - /// Wait for a post. - /// This loops on EINTR. - void wait(); - - private: - // Print a message and exit. - void die(const wchar_t *msg) const; - - // Whether our semaphore was successfully initialized. - bool sem_ok_{}; - - // The semaphore, if initialized. - sem_t sem_{}; - - // Pipes used to emulate a semaphore, if not initialized. - autoclose_pipes_t pipes_{}; -}; - -/// The topic monitor class. This permits querying the current generation values for topics, -/// optionally blocking until they increase. -/// What we would like to write is that we have a set of topics, and threads wait for changes on a -/// condition variable which is tickled in post(). But this can't work because post() may be called -/// from a signal handler and condition variables are not async-signal safe. -/// So instead the signal handler announces changes via a binary semaphore. -/// In the wait case, what generally happens is: -/// A thread fetches the generations, see they have not changed, and then decides to try to wait. -/// It does so by atomically swapping in STATUS_NEEDS_WAKEUP to the status bits. -/// If that succeeds, it waits on the binary semaphore. The post() call will then wake the thread -/// up. If if failed, then either a post() call updated the status values (so perhaps there is a -/// new topic post) or some other thread won the race and called wait() on the semaphore. Here our -/// thread will wait on the data_notifier_ queue. -class topic_monitor_t : noncopyable_t, nonmovable_t { - private: - using topic_bitmask_t = uint8_t; - - // Some stuff that needs to be protected by the same lock. - struct data_t { - /// The current values. - generation_list_t current{}; - - /// A flag indicating that there is a current reader. - /// The 'reader' is responsible for calling sema_.wait(). - bool has_reader{false}; - }; - owning_lock data_{}; - - /// Condition variable for broadcasting notifications. - /// This is associated with data_'s mutex. - std::condition_variable data_notifier_{}; - - /// A status value which describes our current state, managed via atomics. - /// Three possibilities: - /// 0: no changed topics, no thread is waiting. - /// 128: no changed topics, some thread is waiting and needs wakeup. - /// anything else: some changed topic, no thread is waiting. - /// Note that if the msb is set (status == 128) no other bit may be set. - using status_bits_t = uint8_t; - std::atomic status_{}; - - /// Sentinel status value indicating that a thread is waiting and needs a wakeup. - /// Note it is an error for this bit to be set and also any topic bit. - static constexpr uint8_t STATUS_NEEDS_WAKEUP = 128; - - /// Binary semaphore used to communicate changes. - /// If status_ is STATUS_NEEDS_WAKEUP, then a thread has commited to call wait() on our sema and - /// this must be balanced by the next call to post(). Note only one thread may wait at a time. - binary_semaphore_t sema_{}; - - /// Apply any pending updates to the data. - /// This accepts data because it must be locked. - /// \return the updated generation list. - generation_list_t updated_gens_in_data(acquired_lock &data); - - /// Given a list of input generations, attempt to update them to something newer. - /// If \p gens is older, then just return those by reference, and directly return false (not - /// becoming the reader). - /// If \p gens is current and there is not a reader, then do not update \p gens and return true, - /// indicating we should become the reader. Now it is our responsibility to wait on the - /// semaphore and notify on a change via the condition variable. If \p gens is current, and - /// there is already a reader, then wait until the reader notifies us and try again. - bool try_update_gens_maybe_becoming_reader(generation_list_t *gens); - - /// Wait for some entry in the list of generations to change. - /// \return the new gens. - generation_list_t await_gens(const generation_list_t &input_gens); - - /// \return the current generation list, opportunistically applying any pending updates. - generation_list_t updated_gens(); - - /// Helper to convert a topic to a bitmask containing just that topic. - static topic_bitmask_t topic_to_bit(topic_t t) { return 1 << static_cast(t); } - - public: - topic_monitor_t(); - ~topic_monitor_t(); - - /// The principal topic_monitor. This may be fetched from a signal handler. - static topic_monitor_t &principal(); - - /// Post to a topic, potentially from a signal handler. - void post(topic_t topic); - - /// Access the current generations. - generation_list_t current_generations() { return updated_gens(); } - - /// Access the generation for a topic. - generation_t generation_for_topic(topic_t topic) { return current_generations().at(topic); } - - /// For each valid topic in \p gens, check to see if the current topic is larger than - /// the value in \p gens. - /// If \p wait is set, then wait if there are no changes; otherwise return immediately. - /// \return true if some topic changed, false if none did. - /// On a true return, this updates the generation list \p gens. - bool check(generation_list_t *gens, bool wait); +// Hacks to allow us to compile without Rust headers. +struct generation_list_t { + uint64_t sighupint; + uint64_t sigchld; + uint64_t internal_exit; }; #endif + +#endif diff --git a/src/util.cpp b/src/util.cpp deleted file mode 100644 index fae9d9e15..000000000 --- a/src/util.cpp +++ /dev/null @@ -1,199 +0,0 @@ -// Generic utilities library. -#include "config.h" // IWYU pragma: keep - -#include "util.h" - -#include -#include -#include - -#include - -#include "common.h" -#include "fallback.h" // IWYU pragma: keep -#include "wutil.h" // IWYU pragma: keep - -// Compare the strings to see if they begin with an integer that can be compared and return the -// result of that comparison. -static int wcsfilecmp_leading_digits(const wchar_t **a, const wchar_t **b) { - const wchar_t *a1 = *a; - const wchar_t *b1 = *b; - - // Ignore leading 0s. - while (*a1 == L'0') a1++; - while (*b1 == L'0') b1++; - - int ret = 0; - - while (true) { - if (iswdigit(*a1) && iswdigit(*b1)) { - // We keep the cmp value for the - // first differing digit. - // - // If the numbers have the same length, that's the value. - if (ret == 0) { - // Comparing the string value is the same as numerical - // for wchar_t digits! - if (*a1 > *b1) ret = 1; - if (*b1 > *a1) ret = -1; - } - } else { - // We don't have negative numbers and we only allow ints, - // and we have already skipped leading zeroes, - // so the longer number is larger automatically. - if (iswdigit(*a1)) ret = 1; - if (iswdigit(*b1)) ret = -1; - break; - } - a1++; - b1++; - } - - // For historical reasons, we skip trailing whitespace - // like fish_wcstol does! - // This is used in sorting globs, and that's supposed to be stable. - while (iswspace(*a1)) a1++; - while (iswspace(*b1)) b1++; - *a = a1; - *b = b1; - return ret; -} - -/// Compare two strings, representing file names, using "natural" ordering. This means that letter -/// case is ignored. It also means that integers in each string are compared based on the decimal -/// value rather than the string representation. It only handles base 10 integers and they can -/// appear anywhere in each string, including multiple integers. This means that a file name like -/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123. -/// -/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter -/// appears before the former. -/// -/// This does not handle esoterica like Unicode combining characters. Nor does it use collating -/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher -/// Unicode code point. In part because doing so is really hard without the help of something like -/// the ICU library. But also because file names might be in a different encoding than is used by -/// the current fish process which results in weird situations. This is basically a best effort -/// implementation that will do the right thing 99.99% of the time. -/// -/// Returns: -1 if a < b, 0 if a == b, 1 if a > b. -int wcsfilecmp(const wchar_t *a, const wchar_t *b) { - assert(a && b && "Null parameter"); - const wchar_t *orig_a = a; - const wchar_t *orig_b = b; - int retval = 0; // assume the strings will be equal - - while (*a && *b) { - if (iswdigit(*a) && iswdigit(*b)) { - retval = wcsfilecmp_leading_digits(&a, &b); - // If we know the strings aren't logically equal or we've reached the end of one or both - // strings we can stop iterating over the chars in each string. - if (retval || *a == 0 || *b == 0) break; - } - - // Fast path: Skip towupper. - if (*a == *b) { - a++; - b++; - continue; - } - - wint_t al = towupper(*a); - wint_t bl = towupper(*b); - // Sort dashes after Z - see #5634 - if (al == L'-') al = L'['; - if (bl == L'-') bl = L'['; - - if (al < bl) { - retval = -1; - break; - } else if (al > bl) { - retval = 1; - break; - } else { - a++; - b++; - } - } - - if (retval != 0) return retval; // we already know the strings aren't logically equal - - if (*a == 0) { - if (*b == 0) { - // The strings are logically equal. They may or may not be the same length depending on - // whether numbers were present but that doesn't matter. Disambiguate strings that - // differ by letter case or length. We don't bother optimizing the case where the file - // names are literally identical because that won't occur given how this function is - // used. And even if it were to occur (due to being reused in some other context) it - // would be so rare that it isn't worth optimizing for. - retval = std::wcscmp(orig_a, orig_b); - return retval < 0 ? -1 : retval == 0 ? 0 : 1; - } - return -1; // string a is a prefix of b and b is longer - } - - assert(*b == 0); - return 1; // string b is a prefix of a and a is longer -} - -/// wcsfilecmp, but frozen in time for glob usage. -int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b) { - assert(a && b && "Null parameter"); - const wchar_t *orig_a = a; - const wchar_t *orig_b = b; - int retval = 0; // assume the strings will be equal - - while (*a && *b) { - if (iswdigit(*a) && iswdigit(*b)) { - retval = wcsfilecmp_leading_digits(&a, &b); - // If we know the strings aren't logically equal or we've reached the end of one or both - // strings we can stop iterating over the chars in each string. - if (retval || *a == 0 || *b == 0) break; - } - - // Fast path: Skip towlower. - if (*a == *b) { - a++; - b++; - continue; - } - - wint_t al = towlower(*a); - wint_t bl = towlower(*b); - if (al < bl) { - retval = -1; - break; - } else if (al > bl) { - retval = 1; - break; - } else { - a++; - b++; - } - } - - if (retval != 0) return retval; // we already know the strings aren't logically equal - - if (*a == 0) { - if (*b == 0) { - // The strings are logically equal. They may or may not be the same length depending on - // whether numbers were present but that doesn't matter. Disambiguate strings that - // differ by letter case or length. We don't bother optimizing the case where the file - // names are literally identical because that won't occur given how this function is - // used. And even if it were to occur (due to being reused in some other context) it - // would be so rare that it isn't worth optimizing for. - retval = wcscmp(orig_a, orig_b); - return retval < 0 ? -1 : retval == 0 ? 0 : 1; - } - return -1; // string a is a prefix of b and b is longer - } - - assert(*b == 0); - return 1; // string b is a prefix of a and a is longer -} - -/// Return microseconds since the epoch. -long long get_time() { - struct timeval time_struct; - gettimeofday(&time_struct, nullptr); - return 1000000LL * time_struct.tv_sec + time_struct.tv_usec; -} diff --git a/src/util.h b/src/util.h index 5cfb71270..fcb9996b8 100644 --- a/src/util.h +++ b/src/util.h @@ -1,40 +1,17 @@ -// Generic utilities library. #ifndef FISH_UTIL_H #define FISH_UTIL_H -/// Compares two wide character strings with an (arguably) intuitive ordering. This function tries -/// to order strings in a way which is intuitive to humans with regards to sorting strings -/// containing numbers. -/// -/// Most sorting functions would sort the strings 'file1.txt' 'file5.txt' and 'file12.txt' as: -/// -/// file1.txt -/// file12.txt -/// file5.txt -/// -/// This function regards any sequence of digits as a single entity when performing comparisons, so -/// the output is instead: -/// -/// file1.txt -/// file5.txt -/// file12.txt -/// -/// Which most people would find more intuitive. -/// -/// This won't return the optimum results for numbers in bases higher than ten, such as hexadecimal, -/// but at least a stable sort order will result. -/// -/// This function performs a two-tiered sort, where difference in case and in number of leading -/// zeroes in numbers only have effect if no other differences between strings are found. This way, -/// a 'file1' and 'File1' will not be considered identical, and hence their internal sort order is -/// not arbitrary, but the names 'file1', 'File2' and 'file3' will still be sorted in the order -/// given above. +#if INCLUDE_RUST_HEADERS + +#include "util.rs.h" + +#else + +// Hacks to allow us to compile without Rust headers. int wcsfilecmp(const wchar_t *a, const wchar_t *b); - -/// wcsfilecmp, but frozen in time for glob usage. int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b); - -/// Get the current time in microseconds since Jan 1, 1970. long long get_time(); #endif + +#endif diff --git a/src/wait_handle.cpp b/src/wait_handle.cpp index 765419151..9d2c17252 100644 --- a/src/wait_handle.cpp +++ b/src/wait_handle.cpp @@ -40,6 +40,12 @@ void wait_handle_store_t::remove_by_pid(pid_t pid) { } } +wait_handle_ref_t wait_handle_store_t::get(size_t idx) const { + // TODO: this is O(N)! + assert(idx < handles_.size() && "index out of range"); + return *std::next(std::begin(handles_), idx); +} + wait_handle_ref_t wait_handle_store_t::get_by_pid(pid_t pid) const { auto iter = handle_map_.find(pid); if (iter == handle_map_.end()) return nullptr; diff --git a/src/wait_handle.h b/src/wait_handle.h index a040330dd..421e0c028 100644 --- a/src/wait_handle.h +++ b/src/wait_handle.h @@ -37,6 +37,11 @@ struct wait_handle_t { /// Set to true when the process is completed. bool completed{false}; + + /// Autocxx junk. + bool is_completed() const { return completed; } + int get_pid() const { return pid; } + const wcstring &get_base_name() const { return base_name; } }; using wait_handle_ref_t = std::shared_ptr; @@ -70,6 +75,9 @@ class wait_handle_store_t : noncopyable_t { /// Get the list of all wait handles. const wait_handle_list_t &get_list() const { return handles_; } + /// autocxx does not support std::list so allow accessing by index. + wait_handle_ref_t get(size_t idx) const; + /// Convenience to return the size, for testing. size_t size() const { return handles_.size(); } diff --git a/src/wildcard.cpp b/src/wildcard.cpp index 2229be287..21d1282bb 100644 --- a/src/wildcard.cpp +++ b/src/wildcard.cpp @@ -53,7 +53,7 @@ bool wildcard_has_internal(const wchar_t *s, size_t len) { bool wildcard_has(const wchar_t *str, size_t len) { assert(str != nullptr); const wchar_t *end = str + len; - bool qmark_is_wild = !feature_test(features_t::qmark_noglob); + bool qmark_is_wild = !feature_test(feature_flag_t::qmark_noglob); // Fast check for * or ?; if none there is no wildcard. // Note some strings contain * but no wildcards, e.g. if they are quoted. if (std::find(str, end, L'*') == end && (!qmark_is_wild || std::find(str, end, L'?') == end)) { diff --git a/src/wutil.cpp b/src/wutil.cpp index bf8f5e436..77d3f0489 100644 --- a/src/wutil.cpp +++ b/src/wutil.cpp @@ -251,10 +251,10 @@ int wunlink(const wcstring &file_name) { return unlink(tmp.c_str()); } -void wperror(const wchar_t *s) { +void wperror(wcharz_t s) { int e = errno; - if (s[0] != L'\0') { - std::fwprintf(stderr, L"%ls: ", s); + if (s.str[0] != L'\0') { + std::fwprintf(stderr, L"%ls: ", s.str); } std::fwprintf(stderr, L"%s\n", std::strerror(e)); } @@ -515,6 +515,8 @@ const wcstring &wgettext(const wchar_t *in) { return val; } +const wchar_t *wgettext_ptr(const wchar_t *in) { return wgettext(in).c_str(); } + int wmkdir(const wcstring &name, int mode) { cstring name_narrow = wcs2string(name); return mkdir(name_narrow.c_str(), mode); diff --git a/src/wutil.h b/src/wutil.h index a0565faee..a90504bb7 100644 --- a/src/wutil.h +++ b/src/wutil.h @@ -11,7 +11,7 @@ #include #ifdef __APPLE__ // This include is required on macOS 10.10 for locale_t -#include // IWYU pragma: keep +#include // IWYU pragma: keep #endif #include @@ -24,6 +24,19 @@ #include "common.h" #include "maybe.h" +/// A POD wrapper around a null-terminated string, for ffi purposes. +/// This trivial type may be converted to and from const wchar_t *. +struct wcharz_t { + const wchar_t *str; + + /* implicit */ wcharz_t(const wchar_t *s) : str(s) {} + operator const wchar_t *() const { return str; } + operator wcstring() const { return str; } + + inline size_t size() const { return wcslen(str); } + inline size_t length() const { return size(); } +}; + class autoclose_fd_t; /// Wide character version of opendir(). Note that opendir() is guaranteed to set close-on-exec by @@ -43,7 +56,7 @@ int waccess(const wcstring &file_name, int mode); int wunlink(const wcstring &file_name); /// Wide character version of perror(). -void wperror(const wchar_t *s); +void wperror(wcharz_t s); /// Wide character version of getcwd(). wcstring wgetcwd(); @@ -78,6 +91,7 @@ std::wstring wbasename(std::wstring path); /// and bindtextdomain functions. This should probably be moved out of wgettext, so that wgettext /// will be nothing more than a wrapper around gettext, like all other functions in this file. const wcstring &wgettext(const wchar_t *in); +const wchar_t *wgettext_ptr(const wchar_t *in); /// Wide character version of mkdir. int wmkdir(const wcstring &name, int mode); @@ -174,6 +188,7 @@ class dir_iter_t : noncopyable_t { private: /// Whether this dir_iter considers the "." and ".." filesystem entries. bool withdot_{false}; + public: struct entry_t;