diff --git a/CMakeLists.txt b/CMakeLists.txt index 51805628f..2e90030bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ set(FISH_BUILTIN_SRCS src/builtins/disown.cpp src/builtins/echo.cpp src/builtins/emit.cpp src/builtins/eval.cpp src/builtins/exit.cpp src/builtins/fg.cpp src/builtins/function.cpp src/builtins/functions.cpp src/builtins/history.cpp - src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp + src/builtins/jobs.cpp src/builtins/math.cpp src/builtins/printf.cpp src/builtins/path.cpp src/builtins/pwd.cpp src/builtins/random.cpp src/builtins/read.cpp src/builtins/realpath.cpp src/builtins/return.cpp src/builtins/set.cpp src/builtins/set_color.cpp src/builtins/source.cpp src/builtins/status.cpp diff --git a/doc_src/cmds/path.rst b/doc_src/cmds/path.rst new file mode 100644 index 000000000..4a5da7cb1 --- /dev/null +++ b/doc_src/cmds/path.rst @@ -0,0 +1,402 @@ +.. _cmd-path: + +path - manipulate and check paths +================================= + +Synopsis +-------- + +:: + + path basename GENERAL_OPTIONS [PATH ...] + path dirname GENERAL_OPTIONS [PATH ...] + path extension GENERAL_OPTIONS [PATH ...] + path filter GENERAL_OPTIONS [-v | --invert] + [-d] [-f] [-l] [-r] [-w] [-x] \ + [(-t | --type) TYPE] [(-p | --perm) PERMISSION] [PATH ...] + path is GENERAL_OPTIONS [(-v | --invert)] [(-t | --type) TYPE] + [-d] [-f] [-l] [-r] [-w] [-x] + [(-p | --perm) PERMISSION] [PATH ...] + path normalize GENERAL_OPTIONS [PATH ...] + path resolve GENERAL_OPTIONS [PATH ...] + path change-extension GENERAL_OPTIONS EXTENSION [PATH ...] + path sort GENERAL_OPTIONS [-r | --reverse] + [-u | --unique] [--key=basename|dirname|path] [PATH ...] + + GENERAL_OPTIONS := [-z | --null-in] [-Z | --null-out] [-q | --quiet] + +Description +----------- + +``path`` performs operations on paths. + +PATH arguments are taken from the command line unless standard input is connected to a pipe or a file, in which case they are read from standard input, one PATH per line. It is an error to supply PATH arguments on both the command line and on standard input. + +Arguments starting with ``-`` are normally interpreted as switches; ``--`` causes the following arguments not to be treated as switches even if they begin with ``-``. Switches and required arguments are recognized only on the command line. + +When a path starts with ``-``, ``path filter`` and ``path normalize`` will prepend ``./`` on output to avoid it being interpreted as an option otherwise, so it's safe to pass path's output to other commands that can handle relative paths. + +All subcommands accept a ``-q`` or ``--quiet`` switch, which suppresses the usual output but exits with the documented status. In this case these commands will quit early, without reading all of the available input. + +All subcommands also accept a ``-Z`` or ``--null-out`` switch, which makes them print output separated with NUL instead of newlines. This is for further processing, e.g. passing to another ``path``, or ``xargs -0``. This is not recommended when the output goes to the terminal or a command substitution. + +All subcommands also accept a ``-z`` or ``--null-in`` switch, which makes them accept arguments from stdin separated with NULL-bytes. Since Unix paths can't contain NULL, that makes it possible to handle all possible paths and read input from e.g. ``find -print0``. If arguments are given on the commandline this has no effect. This should mostly be unnecessary since ``path`` automatically starts splitting on NULL if one appears in the first PATH_MAX bytes, PATH_MAX being the operating system's maximum length for a path plus a NULL byte. + +Some subcommands operate on the paths as strings and so work on nonexistent paths, while others need to access the paths themselves and so filter out nonexistent paths. + +The following subcommands are available. + +.. _cmd-path-basename: + +"basename" subcommand +--------------------- + +:: + + path basename [-z | --null-in] [-Z | --null-out] [-q | --quiet] [PATH ...] + +``path basename`` returns the last path component of the given path, by removing the directory prefix and removing trailing slashes. In other words, it is the part that is not the dirname. For files you might call it the "filename". + +It returns 0 if there was a basename, i.e. if the path wasn't empty or just slashes. + +Examples +^^^^^^^^ + +:: + + >_ path basename ./foo.mp4 + foo.mp4 + + >_ path basename ../banana + banana + + >_ path basename /usr/bin/ + bin + + >_ path basename /usr/bin/* + # This prints all files in /usr/bin/ + # A selection: + cp + fish + grep + rm + +"dirname" subcommand +-------------------- + +:: + + path dirname [-z | --null-in] [-Z | --null-out] [-q | --quiet] [PATH ...] + +``path dirname`` returns the dirname for the given path. This is the part before the last "/", discounting trailing slashes. In other words, it is the part that is not the basename (discounting superfluous slashes). + +It returns 0 if there was a dirname, i.e. if the path wasn't empty or just slashes. + +Examples +^^^^^^^^ + +:: + + >_ path dirname ./foo.mp4 + . + + >_ path dirname ../banana + .. + + >_ path dirname /usr/bin/ + /usr + +"extension" subcommand +----------------------- + +:: + + path extension [-z | --null-in] [-Z | --null-out] [-q | --quiet] [PATH ...] + +``path extension`` returns the extension of the given path. This is the part after (and including) the last ".", unless that "." followed a "/" or the basename is "." or "..", in which case there is no extension and an empty line is printed. + +If the filename ends in a ".", only a "." is printed. + +It returns 0 if there was an extension. + +Examples +^^^^^^^^ + +:: + + >_ path extension ./foo.mp4 + .mp4 + + >_ path extension ../banana + # an empty line, status 1 + + >_ path extension ~/.config + # an empty line, status 1 + + >_ path extension ~/.config.d + .d + + >_ path extension ~/.config. + . + + >_ set -l path (path change-extension '' ./foo.mp4) + >_ set -l extension (path extension ./foo.mp4) + > echo $path$extension + # reconstructs the original path again. + ./foo.mp4 + +.. _cmd-path-filter: + +"filter" subcommand +-------------------- + +:: + + path filter [-z | --null-in] [-Z | --null-out] [-q | --quiet] \ + [-d] [-f] [-l] [-r] [-w] [-x] \ + [-v | --invert] [(-t | --type) TYPE] [(-p | --perm) PERMISSION] [PATH ...] + +``path filter`` returns all of the given paths that match the given checks. In all cases, the paths need to exist, nonexistent paths are always filtered. + +The available filters are: + +- ``-t`` or ``--type`` with the options: "dir", "file", "link", "block", "char", "fifo" and "socket", in which case the path needs to be a directory, file, link, block device, character device, named pipe or socket, respectively. +- ``-d``, ``-f`` and ``-l`` are short for ``--type=dir``, ``--type=file`` and ``--type=link``, respectively. There are no shortcuts for the other types. + +- ``-p`` or ``--perm`` with the options: "read", "write", and "exec", as well as "suid", "sgid", "user" (referring to the path owner) and "group" (referring to the path's group), in which case the path needs to have all of the given permissions for the current user. +- ``-r``, ``-w`` and ``-x`` are short for ``--perm=read``, ``--perm=write`` and ``--perm=exec``, respectively. There are no shortcuts for the other permissions. + +Note that the path needs to be *any* of the given types, but have *all* of the given permissions. This is because having a path that is both writable and executable makes sense, but having a path that is both a directory and a file doesn't. Links will count as the type of the linked-to file, so links to files count as files, links to directories count as directories. + +The filter options can either be given as multiple options, or comma-separated - ``path filter -t dir,file`` or ``path filter --type dir --type file`` are equivalent. + +With ``--invert``, the meaning of the filtering is inverted - any path that wouldn't pass (including by not existing) passes, and any path that would pass fails. + +When a path starts with ``-``, ``path filter`` will prepend ``./`` to avoid it being interpreted as an option otherwise. + +It returns 0 if at least one path passed the filter. + +``path is`` is shorthand for ``path filter -q``, i.e. just checking without producing output, see :ref:`The is subcommand `. + +Examples +^^^^^^^^ + +:: + + >_ path filter /usr/bin /usr/argagagji + # The (hopefully) nonexistent argagagji is filtered implicitly: + /usr/bin + + >_ path filter --type file /usr/bin /usr/bin/fish + # Only fish is a file + /usr/bin/fish + + >_ path filter --type file,dir --perm exec,write /usr/bin/fish /home/me + # fish is a file, which passes, and executable, which passes, + # but probably not writable, which fails. + # + # $HOME is a directory and both writable and executable, typically. + # So it passes. + /home/me + + >_ path filter -fdxw /usr/bin/fish /home/me + # This is the same as above: "-f" is "--type=file", "-d" is "--type=dir", + # "-x" is short for "--perm=exec" and "-w" short for "--perm=write"! + /home/me + + >_ path filter -fx $PATH/* + # Prints all possible commands - the first entry of each name is what fish would execute! + +.. _cmd-path-is: + +"is" subcommand +-------------------- + +:: + + path is [-z | --null-in] [-Z | --null-out] [-q | --quiet] \ + [-d] [-f] [-l] [-r] [-w] [-x] \ + [-v | --invert] [(-t | --type) TYPE] [(-p | --perm) PERMISSION] [PATH ...] + +``path is`` is short for ``path filter -q``. It returns true if any of the given files passes the filter, but does not produce any output. + +``--quiet`` can still be passed for compatibility but is redundant. The options are the same as for ``path filter``. + +Examples +^^^^^^^^ + +:: + + >_ path is /usr/bin /usr/argagagji + # /usr/bin exists, so this returns a status of 0 (true). It prints nothing. + >_ path is /usr/argagagji + # /usr/argagagji does not, so this returns a status of 1 (false). It also prints nothing. + >_ path is -fx /bin/sh + # /bin/sh is usually an executable file, so this returns true. + +"normalize" subcommand +----------------------- + +:: + + path normalize [-z | --null-in] [-Z | --null-out] [-q | --quiet] [PATH ...] + +``path normalize`` returns the normalized versions of all paths. That means it squashes duplicate "/" (except for two leading "//"), collapses "../" with earlier components and removes "." components. + +Unlike ``realpath`` or ``path resolve``, it does not make the paths absolute. It also does not resolve any symlinks. As such it can operate on non-existent paths. + +Leading "./" components are usually removed. But when a path starts with ``-``, ``path normalize`` will add it instead to avoid confusion with options. + +It returns 0 if any normalization was done, i.e. any given path wasn't in canonical form. + +Examples +^^^^^^^^ + +:: + + >_ path normalize /usr/bin//../../etc/fish + # The "//" is squashed and the ".." components neutralize the components before + /etc/fish + + >_ path normalize /bin//bash + # The "//" is squashed, but /bin isn't resolved even if your system links it to /usr/bin. + /bin/bash + + >_ path normalize ./my/subdirs/../sub2 + my/sub2 + + >_ path normalize -- -/foo + ./-/foo + +"resolve" subcommand +-------------------- + +:: + + path resolve [-z | --null-in] [-Z | --null-out] [-q | --quiet] [PATH ...] + +``path resolve`` returns the normalized, physical and absolute versions of all paths. That means it resolves symlinks and does what ``path normalize`` does: it squashes duplicate "/" (except for two leading "//"), collapses "../" with earlier components and removes "." components. Then it turns that path into the absolute path starting from the filesystem root "/". + +It is similar to ``realpath``, as it creates the "real", canonical version of the path. However, for paths that can't be resolved, e.g. if they don't exist or form a symlink loop, it will resolve as far as it can and normalize the rest. + +It returns 0 if any normalization or resolution was done, i.e. any given path wasn't in canonical form. + +Examples +^^^^^^^^ + +:: + + >_ path resolve /bin//sh + # The "//" is squashed, and /bin is resolved if your system links it to /usr/bin. + # sh here is bash (this is common on linux systems) + /usr/bin/bash + + >_ path resolve /bin/foo///bar/../baz + # Assuming /bin exists and is a symlink to /usr/bin, but /bin/foo doesn't. + # This resolves the /bin/ and normalizes the nonexistent rest: + /usr/bin/foo/baz + +"change-extension" subcommand +----------------------------- + +:: + + path change-extension [-z | --null-in] [-Z | --null-out] \ + [-q | --quiet] EXTENSION [PATH ...] + +``path change-extension`` returns the given paths, with their extension changed to the given new extension. The extension is the part after (and including) the last ".", unless that "." followed a "/" or the basename is "." or "..", in which case there is no previous extension and the new one is simply added. + +If the extension is empty, any previous extension is stripped, along with the ".". This is, of course, the inverse of ``path extension``. + +One leading dot on the extension is ignored, so ".mp3" and "mp3" are treated the same. + +It returns 0 if it was given any paths. + +Examples +^^^^^^^^ + +:: + + >_ path change-extension mp4 ./foo.wmv + ./foo.mp4 + + >_ path change-extension .mp4 ./foo.wmv + ./foo.mp4 + + >_ path change-extension '' ../banana + ../banana + # but status 1, because there was no extension. + + >_ path change-extension '' ~/.config + /home/alfa/.config + # status 1 + + >_ path change-extension '' ~/.config.d + /home/alfa/.config + # status 0 + + >_ path change-extension '' ~/.config. + /home/alfa/.config + # status 0 + +"sort" subcommand +----------------------------- + +:: + + path sort [-z | --null-in] [-Z | --null-out] \ + [-q | --quiet] [-r | --reverse] \ + [--key=basename|dirname|path] [PATH ...] + + +``path sort`` returns the given paths in sorted order. They are sorted in the same order as globs - alphabetically, but with runs of numerical digits compared numerically. + +With ``--reverse`` or ``-r`` the sort is reversed. + +With ``--key=`` only the given path of the path is compared, e.g. ``--key=dirname`` causes only the dirname to be compared, ``--key=basename`` only the basename and ``--key=path`` causes the entire path to be compared (this is the default). + +With ``--unique`` or ``-u`` the sort is deduplicated, meaning only the first of a run that have the same key is kept. So if you are sorting by basename, then only the first of each basename is used. + +The sort used is stable, so sorting first by basename and then by dirname works and causes the files to be grouped according to directory. + +It currently returns 0 if it was given any paths. + +Examples +^^^^^^^^ + +:: + + >_ path sort 10-foo 2-bar + 2-bar + 10-foo + + >_ path sort --invert 10-foo 2-bar + 10-foo + 2-bar + + >_ path sort --unique --key=basename $fish_function_path/*.fish + # prints a list of all function files fish would use, sorted by name. + + +Combining ``path`` +------------------- + +``path`` is meant to be easy to combine with itself, other tools and fish. + +This is why + +- ``path``'s output is automatically split by fish if it goes into a command substitution, so just doing ``(path ...)`` handles all paths, even those containing newlines, correctly +- ``path`` has ``--null-in`` to handle null-delimited input (typically automatically detected!), and ``--null-out`` to pass on null-delimited output + +Some examples of combining ``path``:: + + # Expand all paths in the current directory, leave only executable files, and print their resolved path + path filter -zZ -xf -- * | path resolve -z + + # The same thing, but using find (note -maxdepth needs to come first or find will scream) + # (this also depends on your particular version of find) + # Note the `-z` is unnecessary for any sensible version of find - if `path` sees a NULL, + # it will split on NULL automatically. + find . -maxdepth 1 -type f -executable -print0 | path resolve -z + + set -l paths (path filter -p exec $PATH/fish -Z | path resolve) diff --git a/doc_src/fish_for_bash_users.rst b/doc_src/fish_for_bash_users.rst index e9f014796..66bacac73 100644 --- a/doc_src/fish_for_bash_users.rst +++ b/doc_src/fish_for_bash_users.rst @@ -83,7 +83,7 @@ See :ref:`Shell variables ` for more. Wildcards (globs) ----------------- -Fish only supports the ``*`` and ``**`` glob (and the deprecated ``?`` glob). If a glob doesn't match it fails the command (like with bash's ``failglob``) unless the command is ``for``, ``set`` or ``count`` or the glob is used with an environment override (``VAR=* command``), in which case it expands to nothing (like with bash's ``nullglob`` option). +Fish only supports the ``*`` and ``**`` glob (and the deprecated ``?`` glob) as syntax. If a glob doesn't match it fails the command (like with bash's ``failglob``) unless the command is ``for``, ``set`` or ``count`` or the glob is used with an environment override (``VAR=* command``), in which case it expands to nothing (like with bash's ``nullglob`` option). Globbing doesn't happen on expanded variables, so:: diff --git a/doc_src/language.rst b/doc_src/language.rst index c868cd533..060e51cf2 100644 --- a/doc_src/language.rst +++ b/doc_src/language.rst @@ -475,7 +475,7 @@ Examples: - ``~/.*`` matches all hidden files (also known as "dotfiles") and directories in your home directory. -For most commands, if any wildcard fails to expand, the command is not executed, :ref:`$status ` is set to nonzero, and a warning is printed. This behavior is like what bash does with ``shopt -s failglob``. There are exactly 4 exceptions, namely :ref:`set `, overriding variables in :ref:`overrides `, :ref:`count ` and :ref:`for `. Their globs will instead expand to zero arguments (so the command won't see them at all), like with ``shopt -s nullglob`` in bash. +For most commands, if any wildcard fails to expand, the command is not executed, :ref:`$status ` is set to nonzero, and a warning is printed. This behavior is like what bash does with ``shopt -s failglob``. There are exceptions, namely :ref:`set ` and :ref:`path `, overriding variables in :ref:`overrides `, :ref:`count ` and :ref:`for `. Their globs will instead expand to zero arguments (so the command won't see them at all), like with ``shopt -s nullglob`` in bash. Examples:: diff --git a/share/completions/path.fish b/share/completions/path.fish new file mode 100644 index 000000000..a3810208c --- /dev/null +++ b/share/completions/path.fish @@ -0,0 +1,32 @@ +# Completion for builtin path +# This follows a strict command-then-options approach, so we can just test the number of tokens +complete -f -c path -n "test (count (commandline -opc)) -le 2" -s h -l help -d "Display help and exit" +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a basename -d 'Give basename for given paths' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a dirname -d 'Give dirname for given paths' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a extension -d 'Give extension for given paths' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a change-extension -d 'Change extension for given paths' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a normalize -d 'Normalize given paths (remove ./, resolve ../ against other components..)' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a resolve -d 'Normalize given paths and resolve symlinks' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a filter -d 'Print paths that match a filter' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a is -d 'Return true if any path matched a filter' +complete -f -c path -n "test (count (commandline -opc)) -lt 2" -a sort -d 'Sort paths' +complete -f -c path -n "test (count (commandline -opc)) -ge 2" -s q -l quiet -d "Only return status, no output" +complete -f -c path -n "test (count (commandline -opc)) -ge 2" -s z -l null-in -d "Handle NULL-delimited input" +complete -f -c path -n "test (count (commandline -opc)) -ge 2" -s Z -l null-out -d "Print NULL-delimited output" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s v -l invert -d "Invert meaning of filters" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s t -l type -d "Filter by type" -x -a '(__fish_append , file link dir block char fifo socket)' +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s f -d "Filter files" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s d -d "Filter directories" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s l -d "Filter symlinks" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s p -l perm -d "Filter by permission" -x -a '(__fish_append , read write exec suid sgid user group)' +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s r -d "Filter readable paths" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s w -d "Filter writable paths" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] filter is" -s x -d "Filter executable paths" +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] sort" \ + -l key -x -a 'basename\t"Sort only by basename" dirname\t"Sort only by dirname" path\t"Sort by full path"' +complete -f -c path -n "test (count (commandline -opc)) -ge 2; and contains -- (commandline -opc)[2] sort" -s u -l unique -d 'Only leave the first of each run with the same key' + +# Turn on file completions again. +# match takes a glob as first arg, expand takes only globs. +# We still want files completed then! +complete -F -c path -n "test (count (commandline -opc)) -ge 2" diff --git a/src/builtin.cpp b/src/builtin.cpp index 2b35d3de0..f60ae17cc 100644 --- a/src/builtin.cpp +++ b/src/builtin.cpp @@ -50,6 +50,7 @@ #include "builtins/history.h" #include "builtins/jobs.h" #include "builtins/math.h" +#include "builtins/path.h" #include "builtins/printf.h" #include "builtins/pwd.h" #include "builtins/random.h" @@ -393,6 +394,7 @@ static constexpr builtin_data_t builtin_datas[] = { {L"math", &builtin_math, N_(L"Evaluate math expressions")}, {L"not", &builtin_generic, N_(L"Negate exit status of job")}, {L"or", &builtin_generic, N_(L"Execute command if previous command failed")}, + {L"path", &builtin_path, N_(L"Handle paths")}, {L"printf", &builtin_printf, N_(L"Prints formatted text")}, {L"pwd", &builtin_pwd, N_(L"Print the working directory")}, {L"random", &builtin_random, N_(L"Generate random number")}, diff --git a/src/builtins/path.cpp b/src/builtins/path.cpp new file mode 100644 index 000000000..12cf9441f --- /dev/null +++ b/src/builtins/path.cpp @@ -0,0 +1,907 @@ +// Implementation of the path builtin. +#include "config.h" // IWYU pragma: keep + +#include +#include +#include + +#include +#include +#include +#include + +#include "../builtin.h" +#include "../common.h" +#include "../fallback.h" // IWYU pragma: keep +#include "../io.h" +#include "../parser.h" +#include "../path.h" +#include "../util.h" +#include "../wcstringutil.h" +#include "../wgetopt.h" +#include "../wutil.h" // IWYU pragma: keep + +// How many bytes we read() at once. +// We use PATH_MAX here so we always get at least one path, +// and so we can automatically detect NULL-separated input. +#define PATH_CHUNK_SIZE PATH_MAX + +static void path_error(io_streams_t &streams, const wchar_t *fmt, ...) { + streams.err.append(L"path "); + va_list va; + va_start(va, fmt); + streams.err.append_formatv(fmt, va); + va_end(va); +} + +static void path_unknown_option(parser_t &parser, io_streams_t &streams, const wchar_t *subcmd, + const wchar_t *opt) { + path_error(streams, BUILTIN_ERR_UNKNOWN, subcmd, opt); + builtin_print_error_trailer(parser, streams.err, L"path"); +} + +// We read from stdin if we are the second or later process in a pipeline. +static bool path_args_from_stdin(const io_streams_t &streams) { + return streams.stdin_is_directly_redirected; +} + +static const wchar_t *path_get_arg_argv(int *argidx, const wchar_t *const *argv) { + return argv && argv[*argidx] ? argv[(*argidx)++] : nullptr; +} + +// A helper type for extracting arguments from either argv or stdin. +namespace { +class arg_iterator_t { + // The list of arguments passed to this builtin. + const wchar_t *const *argv_; + // If using argv, index of the next argument to return. + int argidx_; + // If not using argv, a string to store bytes that have been read but not yet returned. + std::string buffer_; + // Whether we have found a char to split on yet, when reading from stdin. + // If explicitly passed, we will always split on NULL, + // if not we will split on NULL if the first PATH_MAX chunk includes one, + // or '\n' otherwise. + bool have_split_; + // The char we have decided to split on when reading from stdin. + char split_{'\0'}; + // Backing storage for the next() string. + wcstring storage_; + const io_streams_t &streams_; + + /// Reads the next argument from stdin, returning true if an argument was produced and false if + /// not. On true, the string is stored in storage_. + bool get_arg_stdin() { + assert(path_args_from_stdin(streams_) && "should not be reading from stdin"); + assert(streams_.stdin_fd >= 0 && "should have a valid fd"); + // Read in chunks from fd until buffer has a line (or the end if split_ is unset). + size_t pos; + while (!have_split_ || (pos = buffer_.find(split_)) == std::string::npos) { + char buf[PATH_CHUNK_SIZE]; + long n = read_blocked(streams_.stdin_fd, buf, PATH_CHUNK_SIZE); + if (n == 0) { + // If we still have buffer contents, flush them, + // in case there was no trailing sep. + if (buffer_.empty()) return false; + storage_ = str2wcstring(buffer_); + buffer_.clear(); + return true; + } + if (n == -1) { + // Some error happened. We can't do anything about it, + // so ignore it. + // (read_blocked already retries for EAGAIN and EINTR) + storage_ = str2wcstring(buffer_); + buffer_.clear(); + return false; + } + buffer_.append(buf, n); + if (!have_split_) { + if (buffer_.find('\0') != std::string::npos) { + split_ = '\0'; + } else { + split_ = '\n'; + } + have_split_ = true; + } + } + + // Split the buffer on the sep and return the first part. + storage_ = str2wcstring(buffer_, pos); + buffer_.erase(0, pos + 1); + return true; + } + + public: + arg_iterator_t(const wchar_t *const *argv, int argidx, const io_streams_t &streams, bool split_null) + : argv_(argv), argidx_(argidx), have_split_(split_null), streams_(streams) {} + + const wcstring *nextstr() { + if (path_args_from_stdin(streams_)) { + return get_arg_stdin() ? &storage_ : nullptr; + } + if (auto arg = path_get_arg_argv(&argidx_, argv_)) { + storage_ = arg; + return &storage_; + } else { + return nullptr; + } + } +}; +} // namespace + +enum { + TYPE_BLOCK = 1 << 0, /// A block device + TYPE_DIR = 1 << 1, /// A directory + TYPE_FILE = 1 << 2, /// A regular file + TYPE_LINK = 1 << 3, /// A link + TYPE_CHAR = 1 << 4, /// A character device + TYPE_FIFO = 1 << 5, /// A fifo + TYPE_SOCK = 1 << 6, /// A socket +}; +typedef uint32_t path_type_flags_t; + +enum { + PERM_READ = 1 << 0, + PERM_WRITE = 1 << 1, + PERM_EXEC = 1 << 2, + PERM_SUID = 1 << 3, + PERM_SGID = 1 << 4, + PERM_USER = 1 << 5, + PERM_GROUP = 1 << 6, +}; +typedef uint32_t path_perm_flags_t; + +// This is used by the subcommands to communicate with the option parser which flags are +// valid and get the result of parsing the command for flags. +struct options_t { //!OCLINT(too many fields) + bool perm_valid = false; + bool type_valid = false; + bool invert_valid = false; + bool reverse_valid = false; + bool key_valid = false; + bool unique_valid = false; + bool unique = false; + bool have_key = false; + const wchar_t *key = nullptr; + + bool null_in = false; + bool null_out = false; + bool quiet = false; + + bool have_type = false; + path_type_flags_t type = 0; + + bool have_perm = false; + // Whether we need to check a special permission like suid. + bool have_special_perm = false; + path_perm_flags_t perm = 0; + + bool invert = false; + bool reverse = false; + + const wchar_t *arg1 = nullptr; +}; + +static void path_out(io_streams_t &streams, const options_t &opts, const wcstring &str) { + if (!opts.quiet) { + if (!opts.null_out) { + streams.out.append_with_separation(str, + separation_type_t::explicitly); + } else { + streams.out.append(str); + streams.out.push_back(L'\0'); + } + } +} + +static int handle_flag_q(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + UNUSED(argv); + UNUSED(parser); + UNUSED(streams); + UNUSED(w); + opts->quiet = true; + return STATUS_CMD_OK; +} + +static int handle_flag_z(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + UNUSED(argv); + UNUSED(parser); + UNUSED(streams); + UNUSED(w); + opts->null_in = true; + return STATUS_CMD_OK; +} + +static int handle_flag_Z(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + UNUSED(argv); + UNUSED(parser); + UNUSED(streams); + UNUSED(w); + opts->null_out = true; + return STATUS_CMD_OK; +} + +static int handle_flag_t(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->type_valid) { + if (!opts->have_type) opts->type = 0; + opts->have_type = true; + wcstring_list_t types = split_string_tok(w.woptarg, L","); + for (const auto &t : types) { + if (t == L"file") { + opts->type |= TYPE_FILE; + } else if (t == L"dir") { + opts->type |= TYPE_DIR; + } else if (t == L"block") { + opts->type |= TYPE_BLOCK; + } else if (t == L"char") { + opts->type |= TYPE_CHAR; + } else if (t == L"fifo") { + opts->type |= TYPE_FIFO; + } else if (t == L"socket") { + opts->type |= TYPE_SOCK; + } else if (t == L"link") { + opts->type |= TYPE_LINK; + } else { + path_error(streams, _(L"%ls: Invalid type '%ls'"), L"path", t.c_str()); + return STATUS_INVALID_ARGS; + } + } + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + + +static int handle_flag_p(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->perm_valid) { + if (!opts->have_perm) opts->perm = 0; + opts->have_perm = true; + wcstring_list_t perms = split_string_tok(w.woptarg, L","); + for (const auto &p : perms) { + if (p == L"read") { + opts->perm |= PERM_READ; + } else if (p == L"write") { + opts->perm |= PERM_WRITE; + } else if (p == L"exec") { + opts->perm |= PERM_EXEC; + } else if (p == L"suid") { + opts->perm |= PERM_SUID; + opts->have_special_perm = true; + } else if (p == L"sgid") { + opts->perm |= PERM_SGID; + opts->have_special_perm = true; + } else if (p == L"user") { + opts->perm |= PERM_USER; + opts->have_special_perm = true; + } else if (p == L"group") { + opts->perm |= PERM_GROUP; + opts->have_special_perm = true; + } else { + path_error(streams, _(L"%ls: Invalid permission '%ls'"), L"path", p.c_str()); + return STATUS_INVALID_ARGS; + } + } + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_perms(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts, path_perm_flags_t perm ) { + if (opts->perm_valid) { + if (!opts->have_perm) opts->perm = 0; + opts->have_perm = true; + opts->perm |= perm; + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_r(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->reverse_valid) { + opts->reverse = true; + return STATUS_CMD_OK; + } else if (opts->perm_valid) { + return handle_flag_perms(argv, parser, streams, w, opts, PERM_READ); + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_w(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + return handle_flag_perms(argv, parser, streams, w, opts, PERM_WRITE); +} +static int handle_flag_x(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + return handle_flag_perms(argv, parser, streams, w, opts, PERM_EXEC); +} + +static int handle_flag_types(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts, path_type_flags_t type) { + if (opts->type_valid) { + if (!opts->have_type) opts->type = 0; + opts->have_type = true; + opts->type |= type; + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_f(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + return handle_flag_types(argv, parser, streams, w, opts, TYPE_FILE); +} +static int handle_flag_l(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + return handle_flag_types(argv, parser, streams, w, opts, TYPE_LINK); +} +static int handle_flag_d(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + return handle_flag_types(argv, parser, streams, w, opts, TYPE_DIR); +} + +static int handle_flag_v(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->invert_valid) { + opts->invert = true; + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_u(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + if (opts->unique_valid) { + opts->unique = true; + return STATUS_CMD_OK; + } + path_unknown_option(parser, streams, argv[0], argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; +} + +static int handle_flag_key(const wchar_t **argv, parser_t &parser, io_streams_t &streams, + const wgetopter_t &w, options_t *opts) { + UNUSED(argv); + UNUSED(parser); + UNUSED(streams); + opts->have_key = true; + opts->key = w.woptarg; + return STATUS_CMD_OK; +} + +/// This constructs the wgetopt() short options string based on which arguments are valid for the +/// subcommand. We have to do this because many short flags have multiple meanings and may or may +/// not require an argument depending on the meaning. +static wcstring construct_short_opts(options_t *opts) { //!OCLINT(high npath complexity) + // All commands accept -z, -Z and -q + wcstring short_opts(L":zZq"); + if (opts->perm_valid) { + short_opts.append(L"p:"); + short_opts.append(L"rwx"); + } + if (opts->type_valid) { + short_opts.append(L"t:"); + short_opts.append(L"fld"); + } + if (opts->invert_valid) short_opts.append(L"v"); + if (opts->reverse_valid) short_opts.append(L"r"); + if (opts->unique_valid) short_opts.append(L"u"); + return short_opts; +} + +// Note that several long flags share the same short flag. That is okay. The caller is expected +// to indicate that a max of one of the long flags sharing a short flag is valid. +// Remember: adjust the completions in share/completions/ when options change +static const struct woption long_options[] = { + {L"quiet", no_argument, nullptr, 'q'}, + {L"null-in", no_argument, nullptr, 'z'}, + {L"null-out", no_argument, nullptr, 'Z'}, + {L"perm", required_argument, nullptr, 'p'}, + {L"type", required_argument, nullptr, 't'}, + {L"invert", no_argument, nullptr, 'v'}, + {L"reverse", no_argument, nullptr, 'r'}, + {L"unique", no_argument, nullptr, 'u'}, + {L"key", required_argument, nullptr, 1}, + {}}; + +static const std::unordered_map flag_to_function = { + {'q', handle_flag_q}, {'v', handle_flag_v}, + {'z', handle_flag_z}, {'Z', handle_flag_Z}, + {'t', handle_flag_t}, {'p', handle_flag_p}, + {'r', handle_flag_r}, {'w', handle_flag_w}, + {'x', handle_flag_x}, {'f', handle_flag_f}, + {'l', handle_flag_l}, {'d', handle_flag_d}, + {'l', handle_flag_l}, {'d', handle_flag_d}, + {'u', handle_flag_u}, {1, handle_flag_key}, +}; + +/// Parse the arguments for flags recognized by a specific string subcommand. +static int parse_opts(options_t *opts, int *optind, int n_req_args, int argc, const wchar_t **argv, + parser_t &parser, io_streams_t &streams) { + const wchar_t *cmd = argv[0]; + wcstring short_opts = construct_short_opts(opts); + const wchar_t *short_options = short_opts.c_str(); + int opt; + wgetopter_t w; + while ((opt = w.wgetopt_long(argc, argv, short_options, long_options, nullptr)) != -1) { + auto fn = flag_to_function.find(opt); + if (fn != flag_to_function.end()) { + int retval = fn->second(argv, parser, streams, w, opts); + if (retval != STATUS_CMD_OK) return retval; + } else if (opt == ':') { + streams.err.append(L"path "); + builtin_missing_argument(parser, streams, cmd, argv[w.woptind - 1], + false /* print_hints */); + return STATUS_INVALID_ARGS; + } else if (opt == '?') { + path_unknown_option(parser, streams, cmd, argv[w.woptind - 1]); + return STATUS_INVALID_ARGS; + } else { + DIE("unexpected retval from wgetopt_long"); + } + } + + *optind = w.woptind; + + if (n_req_args) { + assert(n_req_args == 1); + opts->arg1 = path_get_arg_argv(optind, argv); + if (!opts->arg1 && n_req_args == 1) { + path_error(streams, BUILTIN_ERR_ARG_COUNT0, cmd); + return STATUS_INVALID_ARGS; + } + } + + // At this point we should not have optional args and be reading args from stdin. + if (path_args_from_stdin(streams) && argc > *optind) { + path_error(streams, BUILTIN_ERR_TOO_MANY_ARGUMENTS, cmd); + return STATUS_INVALID_ARGS; + } + + return STATUS_CMD_OK; +} + +static int path_transform(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv, + wcstring (*func)(wcstring)) { + options_t opts; + int optind; + int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + int n_transformed = 0; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + // Empty paths make no sense, but e.g. wbasename returns true for them. + if (arg->empty()) continue; + wcstring transformed = func(*arg); + if (transformed != *arg) { + n_transformed++; + // Return okay if path wasn't already in this form + // TODO: Is that correct? + if (opts.quiet) return STATUS_CMD_OK; + } + path_out(streams, opts, transformed); + } + + return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; +} + + +static int path_basename(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + return path_transform(parser, streams, argc, argv, wbasename); +} + +static int path_dirname(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + return path_transform(parser, streams, argc, argv, wdirname); +} + +// Not a constref because this must have the same type as wdirname. +// cppcheck-suppress passedByValue +static wcstring normalize_helper(wcstring path) { + wcstring np = normalize_path(path, false); + if (!np.empty() && np[0] == L'-') { + np = L"./" + np; + } + return np; +} + +static bool filter_path(options_t opts, const wcstring &path) { + // TODO: Add moar stuff: + // fifos, sockets, size greater than zero, setuid, ... + // Nothing to check, file existence is checked elsewhere. + if (!opts.have_type && !opts.have_perm) return true; + + if (opts.have_type) { + bool type_ok = false; + struct stat buf; + if (opts.type & TYPE_LINK) { + type_ok = !lwstat(path, &buf) && S_ISLNK(buf.st_mode); + } + + auto ret = !wstat(path, &buf); + if (!ret) { + // Does not exist + return false; + } + if (!type_ok && opts.type & TYPE_FILE && S_ISREG(buf.st_mode)) { + type_ok = true; + } + if (!type_ok && opts.type & TYPE_DIR && S_ISDIR(buf.st_mode)) { + type_ok = true; + } + if (!type_ok && opts.type & TYPE_BLOCK && S_ISBLK(buf.st_mode)) { + type_ok = true; + } + if (!type_ok && opts.type & TYPE_CHAR && S_ISCHR(buf.st_mode)) { + type_ok = true; + } + if (!type_ok && opts.type & TYPE_FIFO && S_ISFIFO(buf.st_mode)) { + type_ok = true; + } + if (!type_ok && opts.type & TYPE_SOCK && S_ISSOCK(buf.st_mode)) { + type_ok = true; + } + if (!type_ok) return false; + } + if (opts.have_perm) { + int amode = 0; + if (opts.perm & PERM_READ) amode |= R_OK; + if (opts.perm & PERM_WRITE) amode |= W_OK; + if (opts.perm & PERM_EXEC) amode |= X_OK; + // access returns 0 on success, + // -1 on failure. Yes, C can't even keep its bools straight. + if (waccess(path, amode)) return false; + + // Permissions that require special handling + if (opts.have_special_perm) { + struct stat buf; + auto ret = !wstat(path, &buf); + if (!ret) { + // Does not exist, WTF? + return false; + } + + if (opts.perm & PERM_SUID && !(S_ISUID & buf.st_mode)) return false; + if (opts.perm & PERM_SGID && !(S_ISGID & buf.st_mode)) return false; + if (opts.perm & PERM_USER && !(geteuid() == buf.st_uid)) return false; + if (opts.perm & PERM_GROUP && !(getegid() == buf.st_gid)) return false; + } + } + + // No filters failed. + return true; +} + +static int path_normalize(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + return path_transform(parser, streams, argc, argv, normalize_helper); +} + +static maybe_t find_extension (const wcstring &path) { + // The extension belongs to the basename, + // if there is a "." before the last component it doesn't matter. + // e.g. ~/.config/fish/conf.d/foo + // does not have an extension! The ".d" here is not a file extension for "foo". + // And "~/.config" doesn't have an extension either - the ".config" is the filename. + wcstring filename = wbasename(path); + + // "." and ".." aren't really *files* and therefore don't have an extension. + if (filename == L"." || filename == L"..") return none(); + + // If we don't have a "." or the "." is the first in the filename, + // we do not have an extension + size_t pos = filename.find_last_of(L'.'); + if (pos == wcstring::npos || pos == 0) { + return none(); + } + + // Convert pos back to what it would be in the original path. + return pos + path.size() - filename.size(); +} + +static int path_extension(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + options_t opts; + int optind; + int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + int n_transformed = 0; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + auto pos = find_extension(*arg); + + if (!pos) { + // If there is no extension the extension is empty. + // This is unambiguous because we include the ".". + path_out(streams, opts, L""); + continue; + } + + wcstring ext = arg->substr(*pos); + if (opts.quiet && !ext.empty()) { + return STATUS_CMD_OK; + } + path_out(streams, opts, ext); + n_transformed++; + } + + return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; +} + +static int path_change_extension(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + options_t opts; + int optind; + int retval = parse_opts(&opts, &optind, 1, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + int n_transformed = 0; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + auto pos = find_extension(*arg); + + wcstring ext; + if (!pos) { + ext = *arg; + } else { + ext = arg->substr(0, *pos); + } + + // Only add on the extension "." if we have something. + // That way specifying an empty extension strips it. + if (*opts.arg1) { + if (opts.arg1[0] != L'.') { + ext.push_back(L'.'); + } + ext.append(opts.arg1); + } + path_out(streams, opts, ext); + n_transformed++; + } + + return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; +} + +static int path_resolve(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + options_t opts; + int optind; + int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + int n_transformed = 0; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + auto real = wrealpath(*arg); + + if (!real) { + // The path doesn't exist, isn't readable or a symlink loop. + // We go up until we find something that works. + wcstring next = *arg; + // First add $PWD if we're relative + if (!next.empty() && next[0] != L'/') { + // Note pwd can have symlinks, but we are about to resolve it anyway. + next = path_apply_working_directory(*arg, parser.vars().get_pwd_slash()); + } + auto rest = wbasename(next); + while(!next.empty() && next != L"/") { + next = wdirname(next); + real = wrealpath(next); + if (real) { + real->push_back(L'/'); + real->append(rest); + real = normalize_path(*real, false); + break; + } + rest = wbasename(next) + L'/' + rest; + } + if (!real) { + continue; + } + } + + // Normalize the path so "../" components are eliminated even after + // nonexistent or non-directory components. + // Otherwise `path resolve foo/../` will be `$PWD/foo/../` if foo is a file. + real = normalize_path(*real, false); + + // Return 0 if we found a realpath. + if (opts.quiet) { + return STATUS_CMD_OK; + } + path_out(streams, opts, *real); + n_transformed++; + } + + return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; +} + +static int path_sort(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + options_t opts; + opts.reverse_valid = true; + opts.key_valid = true; + opts.unique_valid = true; + int optind; + int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + + auto keyfunc = +[] (const wcstring &x) { + return wbasename(x); + }; + if (opts.have_key) { + if (std::wcscmp(opts.key, L"basename") == 0) { + // Do nothing, this is the default + } else if (std::wcscmp(opts.key, L"dirname") == 0) { + keyfunc = +[] (const wcstring &x) { + return wdirname(x); + }; + } else if (std::wcscmp(opts.key, L"path") == 0) { + // Act as if --key hadn't been given. + opts.have_key = false; + } else { + path_error(streams, _(L"%ls: Invalid sort key '%ls'\n"), argv[0], opts.key); + return STATUS_INVALID_ARGS; + } + } + + wcstring_list_t list; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + list.push_back(*arg); + } + + if (opts.have_key) { + // Keep a map to avoid repeated keyfunc calls and to keep things alive. + std::map key; + for (const auto &arg : list) { + key[arg] = keyfunc(arg); + } + + // We use a stable sort here, and also explicit < and >, + // to avoid changing the order so you can chain calls. + std::stable_sort(list.begin(), list.end(), + [&](const wcstring &a, const wcstring &b) { + if (!opts.reverse) + return (wcsfilecmp_glob(key[a].c_str(), key[b].c_str()) < 0); + else + return (wcsfilecmp_glob(key[a].c_str(), key[b].c_str()) > 0); + }); + if (opts.unique) { + list.erase(std::unique(list.begin(), list.end(), + [&](const wcstring &a, const wcstring &b) { + return key[a] == key[b]; + }), + list.end()); + } + } else { + // Without --key, we just sort by the entire path, + // so we have no need to transform and such. + std::stable_sort(list.begin(), list.end(), + [&](const wcstring &a, const wcstring &b) { + if (!opts.reverse) + return (wcsfilecmp_glob(a.c_str(), b.c_str()) < 0); + else + return (wcsfilecmp_glob(a.c_str(), b.c_str()) > 0); + }); + if (opts.unique) { + list.erase(std::unique(list.begin(), list.end()), list.end()); + } + } + + for (const auto &entry : list) { + path_out(streams, opts, entry); + } + + /* TODO: Return true only if already sorted? */ + return STATUS_CMD_OK; +} + +// All strings are taken to be filenames, and if they match the type/perms/etc (and exist!) +// they are passed along. +static int path_filter(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv, bool is_is) { + options_t opts; + opts.type_valid = true; + opts.perm_valid = true; + opts.invert_valid = true; + int optind; + int retval = parse_opts(&opts, &optind, 0, argc, argv, parser, streams); + if (retval != STATUS_CMD_OK) return retval; + // If we have been invoked as "path is", which is "path filter -q". + if (is_is) opts.quiet = true; + + int n_transformed = 0; + arg_iterator_t aiter(argv, optind, streams, opts.null_in); + while (const wcstring *arg = aiter.nextstr()) { + if ((!opts.have_perm && !opts.have_type) || (filter_path(opts, *arg) != opts.invert)) { + // If we don't have filters, check if it exists. + if (!opts.have_type && !opts.have_perm) { + bool ok = !waccess(*arg, F_OK); + if (ok == opts.invert) continue; + } + + // We *know* this is a filename, + // and so if it starts with a `-` we *know* it is relative + // to $PWD. So we can add `./`. + if (!arg->empty() && arg->front() == L'-') { + wcstring out = L"./" + *arg; + path_out(streams, opts, out); + } else { + path_out(streams, opts, *arg); + } + n_transformed++; + if (opts.quiet) return STATUS_CMD_OK; + } + } + + return n_transformed > 0 ? STATUS_CMD_OK : STATUS_CMD_ERROR; +} + +static int path_filter(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + return path_filter(parser, streams, argc, argv, false /* is_is */); +} + +static int path_is(parser_t &parser, io_streams_t &streams, int argc, const wchar_t **argv) { + return path_filter(parser, streams, argc, argv, true /* is_is */); +} + +// Keep sorted alphabetically +static constexpr const struct path_subcommand { + const wchar_t *name; + int (*handler)(parser_t &, io_streams_t &, int argc, //!OCLINT(unused param) + const wchar_t **argv); //!OCLINT(unused param) +} path_subcommands[] = { + // TODO: Which operations do we want? + {L"basename", &path_basename}, + {L"change-extension", &path_change_extension}, + {L"dirname", &path_dirname}, + {L"extension", &path_extension}, + {L"filter", &path_filter}, + {L"is", &path_is}, + {L"normalize", &path_normalize}, + {L"resolve", &path_resolve}, + {L"sort", &path_sort}, +}; +ASSERT_SORTED_BY_NAME(path_subcommands); + +/// The path builtin, for handling paths. +maybe_t builtin_path(parser_t &parser, io_streams_t &streams, const wchar_t **argv) { + const wchar_t *cmd = argv[0]; + int argc = builtin_count_args(argv); + if (argc <= 1) { + streams.err.append_format(BUILTIN_ERR_MISSING_SUBCMD, cmd); + builtin_print_error_trailer(parser, streams.err, L"path"); + return STATUS_INVALID_ARGS; + } + + if (std::wcscmp(argv[1], L"-h") == 0 || std::wcscmp(argv[1], L"--help") == 0) { + builtin_print_help(parser, streams, L"path"); + return STATUS_CMD_OK; + } + + const wchar_t *subcmd_name = argv[1]; + const auto *subcmd = get_by_sorted_name(subcmd_name, path_subcommands); + if (!subcmd) { + streams.err.append_format(BUILTIN_ERR_INVALID_SUBCMD, cmd, subcmd_name); + builtin_print_error_trailer(parser, streams.err, L"path"); + return STATUS_INVALID_ARGS; + } + + if (argc >= 3 && (std::wcscmp(argv[2], L"-h") == 0 || std::wcscmp(argv[2], L"--help") == 0)) { + wcstring path_dash_subcommand = wcstring(argv[0]) + L"-" + subcmd_name; + builtin_print_help(parser, streams, path_dash_subcommand.c_str()); + return STATUS_CMD_OK; + } + argc--; + argv++; + return subcmd->handler(parser, streams, argc, argv); +} diff --git a/src/builtins/path.h b/src/builtins/path.h new file mode 100644 index 000000000..537234b83 --- /dev/null +++ b/src/builtins/path.h @@ -0,0 +1,10 @@ +#ifndef FISH_BUILTIN_PATH_H +#define FISH_BUILTIN_PATH_H + +#include +#include + +class parser_t; + +maybe_t builtin_path(parser_t &parser, io_streams_t &streams, const wchar_t **argv); +#endif diff --git a/src/io.h b/src/io.h index b1aa81558..9ed9e9ba6 100644 --- a/src/io.h +++ b/src/io.h @@ -368,10 +368,10 @@ class output_stream_t : noncopyable_t, nonmovable_t { /// \param want_newline this is true if the output item should be ended with a newline. This /// is only relevant if we are printing the output to a stream, virtual void append_with_separation(const wchar_t *s, size_t len, separation_type_t type, - bool want_newline); + bool want_newline = true); /// The following are all convenience overrides. - void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline) { + void append_with_separation(const wcstring &s, separation_type_t type, bool want_newline = true) { append_with_separation(s.data(), s.size(), type, want_newline); } diff --git a/src/parse_execution.cpp b/src/parse_execution.cpp index 994247385..671c2998b 100644 --- a/src/parse_execution.cpp +++ b/src/parse_execution.cpp @@ -891,7 +891,7 @@ end_execution_reason_t parse_execution_context_t::populate_plain_process( function_exists(L"cd", *parser) ? process_type_t::function : process_type_t::builtin; } else { // Not implicit cd. - const globspec_t glob_behavior = (cmd == L"set" || cmd == L"count") ? nullglob : failglob; + const globspec_t glob_behavior = (cmd == L"set" || cmd == L"count" || cmd == L"path") ? nullglob : failglob; // Form the list of arguments. The command is the first argument, followed by any arguments // from expanding the command, followed by the argument nodes themselves. E.g. if the // command is '$gco foo' and $gco is git checkout. diff --git a/tests/checks/path.fish b/tests/checks/path.fish new file mode 100644 index 000000000..410e12d0b --- /dev/null +++ b/tests/checks/path.fish @@ -0,0 +1,200 @@ +#RUN: %fish %s +# The "path" builtin for dealing with paths + +# Extension - for figuring out the file extension of a given path. +path extension / +or echo None +# CHECK: +# CHECK: None + +# No extension +path extension /. +or echo Filename is just a dot, no extension +# CHECK: +# CHECK: Filename is just a dot, no extension + +# No extension - ".foo" is the filename +path extension /.foo +or echo None again +# CHECK: +# CHECK: None again + +path extension /foo +or echo None once more +# CHECK: +# CHECK: None once more +path extension /foo.txt +and echo Success +# CHECK: .txt +# CHECK: Success +path extension /foo.txt/bar +or echo Not even here +# CHECK: +# CHECK: Not even here +path extension . .. +or echo No extension +# CHECK: +# CHECK: No extension +path extension ./foo.mp4 +# CHECK: .mp4 +path extension ../banana +# CHECK: +# nothing, status 1 +echo $status +# CHECK: 1 +path extension ~/.config +# CHECK: +# nothing, status 1 +echo $status +# CHECK: 1 +path extension ~/.config.d +# CHECK: .d +path extension ~/.config. +echo $status +# status 0 +# CHECK: . +# CHECK: 0 + +path change-extension '' ./foo.mp4 +# CHECK: ./foo +path change-extension wmv ./foo.mp4 +# CHECK: ./foo.wmv +path change-extension .wmv ./foo.mp4 +# CHECK: ./foo.wmv +path change-extension '' ../banana +# CHECK: ../banana +# still status 0, because there was an argument +echo $status +# CHECK: 0 +path change-extension '' ~/.config +# CHECK: {{.*}}/.config +echo $status +# CHECK: 0 + +path basename ./foo.mp4 +# CHECK: foo.mp4 +path basename ../banana +# CHECK: banana +path basename /usr/bin/ +# CHECK: bin +path dirname ./foo.mp4 +# CHECK: . +path basename ../banana +# CHECK: banana +path basename /usr/bin/ +# CHECK: bin + +cd $TMPDIR +mkdir -p bin +touch bin/{bash,bssh,chsh,dash,fish,slsh,ssh,zsh} +ln -s $TMPDIR/bin/bash bin/sh + +chmod +x bin/* +# We need files from here on +path filter bin argagagji +# The (hopefully) nonexistent argagagji is filtered implicitly: +# CHECK: bin + +# With --invert, the existing bin is filtered +path filter --invert bin argagagji +# CHECK: argagagji + +# With --invert and a type, bin fails the type, +# and argagagji doesn't exist, so both are printed. +path filter -vf bin argagagji +# CHECK: bin +# CHECK: argagagji + +path filter --type file bin bin/fish +# Only fish is a file +# CHECK: bin/fish +chmod 500 bin/fish +path filter --type file,dir --perm exec,write bin/fish . +# fish is a file, which passes, and executable, which passes, +# but not writable, which fails. +# +# . is a directory and both writable and executable, typically. +# So it passes. +# CHECK: . + +path normalize /usr/bin//../../etc/fish +# The "//" is squashed and the ".." components neutralize the components before +# CHECK: /etc/fish +path normalize /bin//bash +# The "//" is squashed, but /bin isn't resolved even if your system links it to /usr/bin. +# CHECK: /bin/bash + +# Paths with "-" get a "./": +path normalize -- -/foo -foo/foo +# CHECK: ./-/foo +# CHECK: ./-foo/foo +path normalize -- ../-foo +# CHECK: ../-foo + +# This goes for filter as well +touch -- -foo +path filter -f -- -foo +# CHECK: ./-foo + +# We need to remove the rest of the path because we have no idea what its value looks like. +path resolve bin//sh | string match -r -- 'bin/bash$' +# The "//" is squashed, and the symlink is resolved. +# sh here is bash +# CHECK: bin/bash + +# "../" cancels out even files. +path resolve bin//sh/../ | string match -r -- 'bin$' +# CHECK: bin + +# `path resolve` with nonexistent paths +set -l path (path resolve foo/bar) +string match -rq "^"(pwd -P | string escape --style=regex)'/' -- $path +and echo It matches pwd! +or echo pwd is \'$PWD\' resolved path is \'$path\' +# CHECK: It matches pwd! +string replace -r "^"(pwd -P | string escape --style=regex)'/' "" -- $path +# CHECK: foo/bar + +path resolve /banana//terracota/terracota/booooo/../pie +# CHECK: /banana/terracota/terracota/pie + +path sort --key=basename {def,abc}/{456,123,789,abc,def,0} | path sort --key=dirname -r +# CHECK: def/0 +# CHECK: def/123 +# CHECK: def/456 +# CHECK: def/789 +# CHECK: def/abc +# CHECK: def/def +# CHECK: abc/0 +# CHECK: abc/123 +# CHECK: abc/456 +# CHECK: abc/789 +# CHECK: abc/abc +# CHECK: abc/def + +path sort --unique --key=basename {def,abc}/{456,123,789} def/{abc,def,0} abc/{foo,bar,baz} +# CHECK: def/0 +# CHECK: def/123 +# CHECK: def/456 +# CHECK: def/789 +# CHECK: def/abc +# CHECK: abc/bar +# CHECK: abc/baz +# CHECK: def/def +# CHECK: abc/foo + + + +# Symlink loop. +# It goes brrr. +ln -s target link +ln -s link target + +test (path resolve target) = (pwd -P)/target +and echo target resolves to target +# CHECK: target resolves to target + +test (path resolve link) = (pwd -P)/link +and echo link resolves to link +# CHECK: link resolves to link +