diff --git a/Makefile.in b/Makefile.in
index 45f922d66..748dfa85b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -49,15 +49,28 @@ sysconfdir = @sysconfdir@
 docdir = @docdir@
 localedir = @localedir@
 
+#
+# pcre2
+#
+
+PCRE2_WIDTH = @WCHAR_T_BITS@
+PCRE2_DIR = pcre2-10.20
+PCRE2_CXXFLAGS = -I$(PCRE2_DIR)/src
+PCRE2_LIBDIR = $(PCRE2_DIR)/.libs
+PCRE2_LIB = $(PCRE2_LIBDIR)/libpcre2-$(PCRE2_WIDTH).a
+PCRE2_CONFIG = --disable-pcre2-8 --enable-pcre2-$(PCRE2_WIDTH) --disable-shared
+
 #
 # Various flags
 #
 
 MACROS = -DLOCALEDIR=\"$(localedir)\" -DPREFIX=L\"$(prefix)\" -DDATADIR=L\"$(datadir)\" -DSYSCONFDIR=L\"$(sysconfdir)\" -DBINDIR=L\"$(bindir)\" -DDOCDIR=L\"$(docdir)\"
-CXXFLAGS = @CXXFLAGS@ -iquote. -iquote./src/ $(MACROS) $(EXTRA_CXXFLAGS)
+CXXFLAGS = @CXXFLAGS@ -iquote. -iquote./src/ $(MACROS) $(PCRE2_CXXFLAGS) $(EXTRA_CXXFLAGS)
 CPPFLAGS = @CPPFLAGS@
 LDFLAGS = @LDFLAGS@
-LIBS = @LIBS@
+PCRE2 = pcre2-10.20
+LIBS_PCRE2 = -L$(PCRE2_LIBDIR) -lpcre2-$(PCRE2_WIDTH)
+LIBS = @LIBS@ $(LIBS_PCRE2)
 LDFLAGS_FISH = ${LDFLAGS} @LDFLAGS_FISH@
 
 #
@@ -96,7 +109,7 @@ FISH_INDENT_OBJS := obj/fish_indent.o obj/print_help.o $(FISH_OBJS)
 BUILTIN_FILES := src/builtin_set.cpp src/builtin_commandline.cpp	\
 	src/builtin_ulimit.cpp src/builtin_complete.cpp	\
 	src/builtin_jobs.cpp src/builtin_set_color.cpp	\
-	src/builtin_printf.cpp
+	src/builtin_printf.cpp src/builtin_string.cpp
 
 
 #
@@ -778,9 +791,11 @@ obj:
 # Build the fish program.
 #
 
-fish: $(FISH_OBJS) obj/fish.o
+fish: $(FISH_OBJS) obj/fish.o $(PCRE2_LIB)
 	$(CXX) $(CXXFLAGS) $(LDFLAGS_FISH) $(FISH_OBJS) obj/fish.o $(LIBS) -o $@
 
+$(PCRE2_LIB):
+	(cd $(PCRE2_DIR); ./configure $(PCRE2_CONFIG); make)
 
 #
 # Build the fish_tests program.
@@ -828,13 +843,6 @@ depend:
 	./config.status
 .PHONY: depend
 
-# Include What You Use
-iwyu:
-	# Requires the --keep-going flag as it always returns 1
-	# Can't set MAKEFLAGS on a target-specific basic
-	$(MAKE) -k _iwyu CXX=include-what-you-use
-_iwyu: clean $(PROGRAMS)
-.PHONY: iwyu _iwyu
 
 #
 # Cleanup targets
@@ -889,6 +897,7 @@ obj/builtin.o: src/wcstringutil.h src/builtin_set.cpp src/util.h
 obj/builtin.o: src/builtin_commandline.cpp src/builtin_complete.cpp
 obj/builtin.o: src/builtin_ulimit.cpp src/builtin_jobs.cpp
 obj/builtin.o: src/builtin_set_color.cpp src/output.h src/builtin_printf.cpp
+obj/builtin.o: src/builtin_string.cpp
 obj/builtin_test.o: config.h src/common.h src/fallback.h src/signal.h
 obj/builtin_test.o: src/builtin.h src/io.h src/wutil.h src/proc.h
 obj/builtin_test.o: src/parse_tree.h src/tokenizer.h src/parse_constants.h
@@ -944,8 +953,8 @@ obj/fish_tests.o: src/builtin.h src/function.h src/event.h src/autoload.h
 obj/fish_tests.o: src/lru.h src/wutil.h src/expand.h src/parser.h
 obj/fish_tests.o: src/output.h src/exec.h src/path.h src/history.h
 obj/fish_tests.o: src/iothread.h src/postfork.h src/parse_util.h src/pager.h
-obj/fish_tests.o: src/screen.h src/input.h src/input_common.h src/utf8.h
-obj/fish_tests.o: src/env_universal_common.h src/wcstringutil.h
+obj/fish_tests.o: src/screen.h src/input.h src/input_common.h src/wildcard.h
+obj/fish_tests.o: src/utf8.h src/env_universal_common.h src/wcstringutil.h
 obj/fish_version.o: src/fish_version.h
 obj/function.o: config.h src/wutil.h src/common.h src/fallback.h src/signal.h
 obj/function.o: src/autoload.h src/lru.h src/function.h src/event.h src/env.h
@@ -963,15 +972,15 @@ obj/history.o: src/io.h src/common.h src/complete.h src/highlight.h src/env.h
 obj/history.o: src/color.h src/parse_constants.h src/parse_tree.h
 obj/history.o: src/tokenizer.h src/wutil.h src/history.h src/path.h
 obj/history.o: src/iothread.h src/lru.h
+obj/input_common.o: config.h src/fallback.h src/signal.h src/util.h
+obj/input_common.o: src/common.h src/input_common.h
+obj/input_common.o: src/env_universal_common.h src/wutil.h src/env.h
+obj/input_common.o: src/iothread.h
 obj/input.o: config.h src/fallback.h src/signal.h src/wutil.h src/common.h
 obj/input.o: src/reader.h src/io.h src/complete.h src/highlight.h src/env.h
 obj/input.o: src/color.h src/parse_constants.h src/proc.h src/parse_tree.h
 obj/input.o: src/tokenizer.h src/input_common.h src/input.h src/parser.h
 obj/input.o: src/event.h src/output.h
-obj/input_common.o: config.h src/fallback.h src/signal.h src/util.h
-obj/input_common.o: src/common.h src/input_common.h
-obj/input_common.o: src/env_universal_common.h src/wutil.h src/env.h
-obj/input_common.o: src/iothread.h
 obj/intern.o: config.h src/fallback.h src/signal.h src/common.h src/intern.h
 obj/io.o: config.h src/fallback.h src/signal.h src/wutil.h src/common.h
 obj/io.o: src/exec.h src/io.h
@@ -998,14 +1007,6 @@ obj/parse_execution.o: src/builtin.h src/exec.h
 obj/parse_productions.o: src/parse_productions.h src/common.h config.h
 obj/parse_productions.o: src/fallback.h src/signal.h src/parse_constants.h
 obj/parse_productions.o: src/parse_tree.h src/tokenizer.h
-obj/parse_tree.o: src/common.h config.h src/fallback.h src/signal.h
-obj/parse_tree.o: src/parse_constants.h src/parse_productions.h
-obj/parse_tree.o: src/parse_tree.h src/tokenizer.h src/wutil.h src/proc.h
-obj/parse_tree.o: src/io.h
-obj/parse_util.o: config.h src/fallback.h src/signal.h src/util.h src/wutil.h
-obj/parse_util.o: src/common.h src/tokenizer.h src/parse_util.h
-obj/parse_util.o: src/parse_constants.h src/expand.h src/env.h src/wildcard.h
-obj/parse_util.o: src/complete.h src/parse_tree.h src/builtin.h src/io.h
 obj/parser.o: config.h src/fallback.h src/signal.h src/common.h src/wutil.h
 obj/parser.o: src/proc.h src/io.h src/parse_tree.h src/tokenizer.h
 obj/parser.o: src/parse_constants.h src/parser.h src/event.h src/function.h
@@ -1014,6 +1015,14 @@ obj/parser.o: src/highlight.h src/color.h src/sanity.h src/intern.h
 obj/parser.o: src/parse_util.h src/parse_execution.h
 obj/parser_keywords.o: config.h src/fallback.h src/signal.h src/common.h
 obj/parser_keywords.o: src/parser_keywords.h
+obj/parse_tree.o: src/common.h config.h src/fallback.h src/signal.h
+obj/parse_tree.o: src/parse_constants.h src/parse_productions.h
+obj/parse_tree.o: src/parse_tree.h src/tokenizer.h src/wutil.h src/proc.h
+obj/parse_tree.o: src/io.h
+obj/parse_util.o: config.h src/fallback.h src/signal.h src/util.h src/wutil.h
+obj/parse_util.o: src/common.h src/tokenizer.h src/parse_util.h
+obj/parse_util.o: src/parse_constants.h src/expand.h src/env.h src/wildcard.h
+obj/parse_util.o: src/complete.h src/parse_tree.h src/builtin.h src/io.h
 obj/path.o: config.h src/fallback.h src/signal.h src/common.h src/env.h
 obj/path.o: src/wutil.h src/path.h src/expand.h src/parse_constants.h
 obj/postfork.o: src/signal.h src/common.h config.h src/fallback.h src/proc.h
diff --git a/configure.ac b/configure.ac
index 52bc00f1c..f9b446612 100644
--- a/configure.ac
+++ b/configure.ac
@@ -24,6 +24,7 @@ conf_arg=$@
 AC_SUBST(HAVE_GETTEXT)
 AC_SUBST(HAVE_DOXYGEN)
 AC_SUBST(LDFLAGS_FISH)
+AC_SUBST(WCHAR_T_BITS)
 
 
 #
@@ -375,17 +376,16 @@ if test x$local_gettext != xno; then
   AC_CHECK_HEADERS([libintl.h])
 fi
 
-AC_CHECK_HEADER(
-  [regex.h],
-  [
-    AC_DEFINE(
-      [HAVE_REGEX_H],
-      [1],
-      [Define to 1 if you have the <regex.h> header file.]
-    )
-  ],
-  [AC_MSG_ERROR([Could not find the header regex.h, needed to build fish])]
-)
+
+#
+# Get the size in bits of wchar_t, needed for configuring the pcre2 build
+# and for code that #includes pcre2.h
+#
+
+AC_CHECK_SIZEOF(wchar_t)
+WCHAR_T_BITS=`expr 8 \* $ac_cv_sizeof_wchar_t`
+AC_DEFINE_UNQUOTED([WCHAR_T_BITS], [$WCHAR_T_BITS], [The size of wchar_t in bits.])
+
 
 #
 # On some platforms (Solaris 10) adding -std=c99 in turn requires that
@@ -785,6 +785,7 @@ else
   AC_MSG_RESULT(no)
 fi
 
+
 # Check for Solaris curses tputs having fixed length parameter list.
 AC_MSG_CHECKING([if we are using non varargs tparm.])
 AC_COMPILE_IFELSE(
diff --git a/doc_src/string.txt b/doc_src/string.txt
new file mode 100644
index 000000000..5d8dcaf08
--- /dev/null
+++ b/doc_src/string.txt
@@ -0,0 +1,200 @@
+\section string string - manipulate strings
+
+\subsection string-synopsis Synopsis
+\fish{synopsis}
+string length [(-q | --quiet)] [STRING...]
+string sub [(-s | --start) START] [(-l | --length) LENGTH]
+           [(-q | --quiet)] [STRING...]
+string split [(-m | --max) MAX] [(-r | --right)] [(-q | --quiet)]
+             SEP [STRING...]
+string join [(-q | --quiet)] SEP [STRING...]
+string trim [(-l | --left)] [(-r | --right)] [(-c | --chars CHARS)]
+            [(-q | --quiet)] [STRING...]
+string escape [(-n | --no-quoted)] [STRING...]
+string match [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)]
+             [(-n | --index)] [(-q | --quiet)] PATTERN [STRING...]
+string replace [(-a | --all)] [(-i | --ignore-case)] [(-r | --regex)]
+               [(-q | --quiet)] PATTERN REPLACEMENT [STRING...]
+\endfish
+
+
+\subsection string-description Description
+
+`string` performs operations on strings.
+
+STRING arguments are taken from the command line unless standard input is connected to a pipe or a file, in which case they are read from standard input. It is an error to supply STRING arguments on the command line and on standard input.
+
+Arguments beginning with `-` are normally interpreted as switches; `--` causes the following arguments not to be treated as switches even if they begin with `-`. Switches and required arguments are recognized only on the command line.
+
+Most subcommands accept a `-q` or `--quiet` switch, which suppresses the usual output but exits with the documented status.
+
+The following subcommands are available:
+
+- `length` reports the length of each string argument in characters. Exit status: 0 if at least one non-empty STRING was given, or 1 otherwise.
+
+- `sub` prints a substring of each string argument. The start of the substring can be specified with `-s` or `--start` followed by a 1-based index value. Positive index values are relative to the start of the string and negative index values are relative to the end of the string. The default start value is 1. The length of the substring can be specified with `-l` or `--length`. If the length is not specified, the substring continues to the end of each STRING. Exit status: 0 if at least one substring operation was performed, 1 otherwise.
+
+- `split` splits each STRING on the separator SEP, which can be an empty string. If `-m` or `--max` is specified, at most MAX splits are done. If `-r` or `--right` is given, splitting is performed right-to-left. This is useful in combination with `-m` or `--max`. Exit status: 0 if at least one split was performed, or 1 otherwise.
+
+- `join` joins its STRING arguments into a single string separated by SEP, which can be an empty string. Exit status: 0 if at least one join was performed, or 1 otherwise.
+
+- `trim` removes leading and trailing whitespace from each STRING. If `-l` or `--left` is given, only leading whitespace is removed. If `-r` or `--right` is given, only trailing whitespace is trimmed. The `-c` or `--chars` switch causes the characters in CHARS to be removed instead of whitespace. Exit status: 0 if at least one character was trimmed, or 1 otherwise.
+
+- `escape` escapes each STRING such that it can be passed back to `eval` to produce the original argument again. By default, all special characters are escaped, and quotes are used to simplify the output when possible. If `-q` or `--no-quote` is given, the simplifying quoted format is not used. Exit status: 0 if at least one string was escaped, or 1 otherwise.
+
+- `match` tests each STRING against a pattern and prints matching substrings. Only the first match is printed unless `-a` or `--all` is given, in which case all matches are reported. Matching can be made case-insensitive with `-i` or `--ignore-case`. If `-n` or `--index` is given, each match is reported as a 1-based start position, or 0 for no match. By default, PATTERN is interpreted as a glob pattern matched against each entire string argument. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression. Note that for a regular expressions containing capturing groups, multiple items will be reported for each match, one for the entire match and one for each capturing group. Exit status: 0 if at least one match was found, or 1 otherwise.
+
+- `replace` is similar to `match` but replaces non-overlapping matching substrings with a replacement string and prints the result. By default, PATTERN is treated as a literal substring to be matched by the literal string REPLACEMENT. If `-r` or `--regex` is given, PATTERN is interpreted as a Perl-compatible regular expression, and REPLACEMENT can refer to capturing groups by number or name as `$n` or `${n}`. Exit status: 0 if at least one replacement was performed, or 1 otherwise.
+
+
+\subsection string-example Examples
+
+\fish
+string length 'hello, world'
+# Output:
+# 12
+
+string length -q $str
+# Equivalent to test -n $str
+\endfish
+
+\fish
+string sub --length 2 abcde
+# Output:
+# ab
+
+string sub -s 2 -l 2 abcde
+# Output:
+# bc
+
+string sub --start=-2 abcde
+# Output:
+# de
+\endfish
+
+\fish
+string split . example.com
+# Output:
+# example
+# com
+
+string split -r -m1 / /usr/local/bin/fish
+# Output:
+# /usr/local/bin
+# fish
+
+string split '' abc
+# Output:
+# a
+# b
+# c
+\endfish
+
+\fish
+seq 3 | string join ...
+# Output:
+# 1...2...3
+\endfish
+
+\fish
+string trim ' abc  '
+# Output:
+# abc
+
+string trim --right --chars=yz xyzzy zany
+# Output:
+# x
+# zan
+\endfish
+
+\fish
+echo \\x07 | string escape
+# Output:
+# \\cg
+\endfish
+
+\fish
+# string match glob examples
+
+string match '?' a
+# Output:
+# a
+
+string match 'a*b' axxb
+# Output:
+# axxb
+
+string match -i 'a??B' Axxb
+# Output:
+# Axxb
+
+string match -a -i '[aeiou]' A B C D E
+# Output:
+# A
+# E
+
+string match '[^fb]*' foo bar baz qux
+# Output:
+# qux
+
+echo 'ok?' | string match '*\\?'
+# Output:
+# ok?
+
+# string match regex examples
+
+string match -r 'cat|dog|fish' 'nice dog'
+# Output:
+# dog
+
+string match -r '(\\d\\d?):(\\d\\d):(\\d\\d)' 2:34:56
+# Output:
+# 2:34:56
+# 2
+# 34
+# 56
+
+string match -r '^(\\w{2,4})\\g1$' papa mud murmur
+# Output:
+# papa
+# pa
+# murmur
+# mur
+
+string match -r -n at catch
+# Output:
+# 2
+
+string match -r -i '0x[0-9a-f]{1,8}' 'int xyzzy = 0xBadC0de;'
+# Output:
+# 0xBadC0de
+\endfish
+
+\fish
+
+# string replace literal examples
+
+string replace is was 'blue is my favorite'
+# Output:
+# blue was my favorite
+
+string replace 3rd last 1st 2nd 3rd
+# Output:
+# 1st
+# 2nd
+# last
+
+string replace -a ' ' _ 'spaces to underscores'
+# Output:
+# spaces_to_underscores
+
+# string replace regex examples
+
+string replace -r -a '[^\\d.]+' ' ' '0 one two 3.14 four 5x'
+# Output:
+# 0 3.14 5
+
+string replace -r '(\\w+)\\s+(\\w+)' '$1 $2 $1' 'left  right'
+# Output:
+# left right left
+\endfish
diff --git a/src/builtin.cpp b/src/builtin.cpp
index 039dc7bc6..b1c15a96d 100644
--- a/src/builtin.cpp
+++ b/src/builtin.cpp
@@ -399,6 +399,7 @@ static void builtin_missing_argument(parser_t &parser, const wchar_t *cmd, const
 #include "builtin_jobs.cpp"
 #include "builtin_set_color.cpp"
 #include "builtin_printf.cpp"
+#include "builtin_string.cpp"
 
 /* builtin_test lives in builtin_test.cpp */
 int builtin_test(parser_t &parser, wchar_t **argv);
@@ -4123,6 +4124,7 @@ static const builtin_data_t builtin_datas[]=
     { 		L"set_color",  &builtin_set_color, N_(L"Set the terminal color")   },
     { 		L"source",  &builtin_source, N_(L"Evaluate contents of file")   },
     { 		L"status",  &builtin_status, N_(L"Return status information about fish")  },
+    { 		L"string",  &builtin_string, N_(L"Manipulate strings")  },
     { 		L"switch",  &builtin_generic, N_(L"Conditionally execute a block of commands")   },
     { 		L"test",  &builtin_test, N_(L"Test a condition")   },
     { 		L"true",  &builtin_true, N_(L"Return a successful result") },
diff --git a/src/builtin_string.cpp b/src/builtin_string.cpp
new file mode 100644
index 000000000..cd3f36a07
--- /dev/null
+++ b/src/builtin_string.cpp
@@ -0,0 +1,1438 @@
+/** \file builtin_string.cpp
+  Implementation of the string builtin.
+*/
+
+#define PCRE2_CODE_UNIT_WIDTH WCHAR_T_BITS
+#ifdef _WIN32
+#define PCRE2_STATIC
+#endif
+#include "pcre2.h"
+
+#define MAX_REPLACE_SIZE size_t(1048576)  // pcre2_substitute maximum output size in wchar_t
+
+enum
+{
+    BUILTIN_STRING_OK = STATUS_BUILTIN_OK,
+    BUILTIN_STRING_ERROR = STATUS_BUILTIN_ERROR
+};
+
+static void string_fatal_error(const wchar_t *fmt, ...)
+{
+    va_list va;
+    va_start(va, fmt);
+    wcstring errstr = vformat_string(fmt, va);
+    va_end(va);
+
+    if (!errstr.empty() && errstr.at(errstr.length() - 1) != L'\n')
+    {
+        errstr += L'\n';
+    }
+
+    stderr_buffer += errstr;
+}
+
+static const wchar_t *string_get_arg_stdin()
+{
+    static wcstring arg;
+
+    arg.clear();
+
+    bool eof = false;
+    bool gotarg = false;
+
+    for (;;)
+    {
+        wchar_t wch = L'\0';
+        mbstate_t state = {};
+        for (;;)
+        {
+            char ch = '\0';
+            if (read_blocked(builtin_stdin, &ch, 1) <= 0)
+            {
+                eof = true;
+                break;
+            }
+            else
+            {
+                size_t n = mbrtowc(&wch, &ch, 1, &state);
+                if (n == size_t(-1))
+                {
+                    // Invalid multibyte sequence: start over
+                    memset(&state, 0, sizeof(state));
+                }
+                else if (n == size_t(-2))
+                {
+                    // Incomplete sequence: continue reading
+                }
+                else
+                {
+                    // Got a complete char (could be L'\0')
+                    break;
+                }
+            }
+        }
+
+        if (eof)
+        {
+            break;
+        }
+
+        if (wch == L'\n')
+        {
+            gotarg = true;
+            break;
+        }
+
+        arg += wch;
+    }
+
+    return gotarg ? arg.c_str() : 0;
+}
+
+static const wchar_t *string_get_arg_argv(int *argidx, wchar_t **argv)
+{
+    return (argv && argv[*argidx]) ? argv[(*argidx)++] : 0;
+}
+
+static inline const wchar_t *string_get_arg(int *argidx, wchar_t **argv)
+{
+    if (isatty(builtin_stdin))
+    {
+        return string_get_arg_argv(argidx, argv);
+    }
+    else
+    {
+        return string_get_arg_stdin();
+    }
+}
+
+static int string_escape(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"n";
+    const struct woption long_options[] =
+    {
+        { L"no-quoted", no_argument, 0, 'n' },
+        { 0, 0, 0, 0 }
+    };
+
+    escape_flags_t flags = ESCAPE_ALL;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'n':
+                flags |= ESCAPE_NO_QUOTED;
+                break;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    int nesc = 0;
+    const wchar_t *arg;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        stdout_buffer += escape(arg, flags);
+        stdout_buffer += L'\n';
+        nesc++;
+    }
+
+    return (nesc > 0) ? 0 : 1;
+}
+
+static int string_join(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"q";
+    const struct woption long_options[] =
+    {
+        { L"quiet", no_argument, 0, 'q'},
+        { 0, 0, 0, 0 }
+    };
+
+    bool quiet = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'q':
+                quiet = true;
+                break;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    const wchar_t *sep;
+    if ((sep = string_get_arg_argv(&i, argv)) == 0)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    int nargs = 0;
+    const wchar_t *arg;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        if (!quiet)
+        {
+            stdout_buffer += arg;
+            stdout_buffer += sep;
+        }
+        nargs++;
+    }
+    if (nargs > 0 && !quiet)
+    {
+        stdout_buffer.resize(stdout_buffer.length() - wcslen(sep));
+        stdout_buffer += L'\n';
+    }
+
+    return (nargs > 1) ? 0 : 1;
+}
+
+static int string_length(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"q";
+    const struct woption long_options[] =
+    {
+        { L"quiet", no_argument, 0, 'q'},
+        { 0, 0, 0, 0 }
+    };
+
+    bool quiet = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'q':
+                quiet = true;
+                break;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    const wchar_t *arg;
+    int nnonempty = 0;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        size_t n = wcslen(arg);
+        if (n > 0)
+        {
+            nnonempty++;
+        }
+        if (!quiet)
+        {
+            stdout_buffer += to_string(int(n));
+            stdout_buffer += L'\n';
+        }
+    }
+
+    return (nnonempty > 0) ? 0 : 1;
+}
+
+struct match_options_t
+{
+    bool all;
+    bool ignore_case;
+    bool index;
+    bool quiet;
+
+    match_options_t(): all(false), ignore_case(false), index(false), quiet(false) { }
+};
+
+class string_matcher_t
+{
+protected:
+    match_options_t opts;
+    const wchar_t *argv0;
+    int nmatch;
+
+public:
+    string_matcher_t(const wchar_t *argv0_, const match_options_t &opts_)
+        : opts(opts_), argv0(argv0_), nmatch(0)
+    { }
+
+    virtual ~string_matcher_t() { }
+    virtual bool report_matches(const wchar_t *arg) = 0;
+    int match_count() { return nmatch; }
+};
+
+class wildcard_matcher_t: public string_matcher_t
+{
+    const wchar_t *pattern;
+
+    bool arg_matches(const wchar_t *pat, const wchar_t *arg)
+    {
+        for (; *arg != L'\0'; arg++, pat++)
+        {
+            switch (*pat)
+            {
+                case L'?':
+                    break;
+
+                case L'*':
+                    // skip redundant *
+                    while (*pat == L'*')
+                    {
+                        pat++;
+                    }
+
+                    // * at end matches whatever follows
+                    if (*pat == L'\0')
+                    {
+                        return true;
+                    }
+
+                    while (*arg != L'\0')
+                    {
+                        if (arg_matches(pat, arg++))
+                        {
+                            return true;
+                        }
+                    }
+                    return false;
+
+                case L'[':
+                {
+                    bool negate = false;
+                    if (*++pat == L'^')
+                    {
+                        negate = true;
+                        pat++;
+                    }
+
+                    bool match = false;
+                    wchar_t argch = opts.ignore_case ? towlower(*arg) : *arg;
+                    wchar_t patch, patch2;
+                    while ((patch = *pat++) != L']')
+                    {
+                        if (patch == L'\0')
+                        {
+                            return false; // no closing ]
+                        }
+                        if (*pat == L'-' && (patch2 = *(pat + 1)) != L'\0' && patch2 != L']')
+                        {
+                            if (opts.ignore_case ? towlower(patch) <= argch && argch <= towlower(patch2)
+                                                 : patch <= argch && argch <= patch2)
+                            {
+                                match = true;
+                            }
+                            pat += 2;
+                        }
+                        else if (patch == argch)
+                        {
+                            match = true;
+                        }
+                    }
+                    if (match == negate)
+                    {
+                        return false;
+                    }
+                    pat--;
+                    break;
+                }
+
+                case L'\\':
+                    if (*(pat + 1) != L'\0')
+                    {
+                        pat++;
+                    }
+                    // fall through
+
+                default:
+                    if (opts.ignore_case ? towlower(*arg) != towlower(*pat) : *arg != *pat)
+                    {
+                        return false;
+                    }
+                    break;
+            }
+        }
+        // arg is exhausted - it's a match only if pattern is as well
+        while (*pat == L'*')
+        {
+            pat++;
+        }
+        return *pat == L'\0';
+    }
+
+public:
+    wildcard_matcher_t(const wchar_t *argv0_, const match_options_t &opts_, const wchar_t *pattern_)
+        : string_matcher_t(argv0_, opts_),
+          pattern(pattern_)
+    { }
+
+    virtual ~wildcard_matcher_t() { }
+
+    bool report_matches(const wchar_t *arg)
+    {
+        if (opts.all || nmatch == 0)
+        {
+            bool match = arg_matches(pattern, arg);
+            if (match)
+            {
+                nmatch++;
+            }
+            if (!opts.quiet)
+            {
+                if (match)
+                {
+                    if (opts.index)
+                    {
+                        stdout_buffer += L"1\n";
+                    }
+                    else
+                    {
+                        stdout_buffer += arg;
+                        stdout_buffer += L'\n';
+                    }
+                }
+            }
+        }
+        return true;
+    }
+};
+
+class pcre2_matcher_t: public string_matcher_t
+{
+    pcre2_code *regex;
+    pcre2_match_data *match;
+
+    int report_match(const wchar_t *arg, int pcre2_rc)
+    {
+        // Return values: -1 = error, 0 = no match, 1 = match
+        if (pcre2_rc == PCRE2_ERROR_NOMATCH)
+        {
+            return 0;
+        }
+        if (pcre2_rc < 0)
+        {
+            // see http://www.pcre.org/current/doc/html/pcre2api.html#SEC30
+            string_fatal_error(_(L"%ls: Regular expression match error %d"), argv0, pcre2_rc);
+            return -1;
+        }
+        if (pcre2_rc == 0)
+        {
+            // The output vector wasn't big enough. Should not happen.
+            string_fatal_error(_(L"%ls: Regular expression internal error"), argv0);
+            return -1;
+        }
+        PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match);
+        for (int j = 0; j < pcre2_rc; j++)
+        {
+            PCRE2_SIZE begin = ovector[2*j];
+            PCRE2_SIZE end = ovector[2*j + 1];
+            if (!opts.quiet)
+            {
+                if (begin != PCRE2_UNSET && end != PCRE2_UNSET)
+                {
+                    if (opts.index)
+                    {
+                        stdout_buffer += to_string(begin + 1);
+                    }
+                    else if (end > begin) // may have end < begin if \K is used
+                    {
+                        stdout_buffer += wcstring(&arg[begin], end - begin);
+                    }
+                    stdout_buffer += L'\n';
+                }
+            }
+        }
+        return 1;
+    }
+
+public:
+    pcre2_matcher_t(const wchar_t *argv0_, const match_options_t &opts_, const wchar_t *pattern)
+        : string_matcher_t(argv0_, opts_),
+          regex(0), match(0)
+    {
+        // Disable some sequences that can lead to security problems
+        uint32_t options = PCRE2_NEVER_UTF;
+#if PCRE2_CODE_UNIT_WIDTH < 32
+        options |= PCRE2_NEVER_BACKSLASH_C;
+#endif
+
+        int err_code = 0;
+        PCRE2_SIZE err_offset = 0;
+
+        regex = pcre2_compile(
+            PCRE2_SPTR(pattern),
+            PCRE2_ZERO_TERMINATED,
+            options | (opts.ignore_case ? PCRE2_CASELESS : 0),
+            &err_code,
+            &err_offset,
+            0);
+        if (regex == 0)
+        {
+            string_fatal_error(_(L"%ls: Regular expression compilation failed at offset %d"),
+                argv0, int(err_offset));
+            return;
+        }
+
+        match = pcre2_match_data_create_from_pattern(regex, 0);
+        if (match == 0)
+        {
+            DIE_MEM();
+        }
+    }
+
+    virtual ~pcre2_matcher_t()
+    {
+        if (match != 0)
+        {
+            pcre2_match_data_free(match);
+        }
+        if (regex != 0)
+        {
+            pcre2_code_free(regex);
+        }
+    }
+
+    bool report_matches(const wchar_t *arg)
+    {
+        // A return value of true means all is well (even if no matches were
+        // found), false indicates an unrecoverable error.
+        if (regex == 0)
+        {
+            // pcre2_compile() failed
+            return false;
+        }
+
+        if (!opts.all && nmatch > 0)
+        {
+            return true;
+        }
+
+        // See pcre2demo.c for an explanation of this logic
+        PCRE2_SIZE arglen = wcslen(arg);
+        int rc = report_match(arg, pcre2_match(regex, PCRE2_SPTR(arg), arglen, 0, 0, match, 0));
+        if (rc < 0)
+        {
+            // pcre2 match error
+            return false;
+        }
+        if (rc == 0)
+        {
+            // no match
+            return true;
+        }
+        nmatch++;
+
+        // Report any additional matches
+        PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match);
+        while (opts.all || nmatch == 0)
+        {
+            uint32_t options = 0;
+            PCRE2_SIZE offset = ovector[1]; // Start at end of previous match
+            PCRE2_SIZE old_offset = pcre2_get_startchar(match);
+            if (offset <= old_offset)
+            {
+                offset = old_offset + 1;
+            }
+
+            if (ovector[0] == ovector[1])
+            {
+                if (ovector[0] == arglen)
+                {
+                    break;
+                }
+                options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
+            }
+
+            rc = report_match(arg, pcre2_match(regex, PCRE2_SPTR(arg), arglen, offset, options, match, 0));
+            if (rc < 0)
+            {
+                return false;
+            }
+            if (rc == 0)
+            {
+                if (options == 0)
+                {
+                    // All matches found
+                    break;
+                }
+                ovector[1] = offset + 1;
+                continue;
+            }
+            nmatch++;
+        }
+        return true;
+    }
+};
+
+static int string_match(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"ainqr";
+    const struct woption long_options[] =
+    {
+        { L"all", no_argument, 0, 'a'},
+        { L"ignore-case", no_argument, 0, 'i'},
+        { L"index", no_argument, 0, 'n'},
+        { L"quiet", no_argument, 0, 'q'},
+        { L"regex", no_argument, 0, 'r'},
+        { 0, 0, 0, 0 }
+    };
+
+    match_options_t opts;
+    bool regex = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'a':
+                opts.all = true;
+                break;
+
+            case 'i':
+                opts.ignore_case = true;
+                break;
+
+            case 'n':
+                opts.index = true;
+                break;
+
+            case 'q':
+                opts.quiet = true;
+                break;
+
+            case 'r':
+                regex = true;
+                break;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    const wchar_t *pattern;
+    if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    string_matcher_t *matcher;
+    if (regex)
+    {
+        matcher = new pcre2_matcher_t(argv[0], opts, pattern);
+    }
+    else
+    {
+        matcher = new wildcard_matcher_t(argv[0], opts, pattern);
+    }
+
+    const wchar_t *arg;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        if (!matcher->report_matches(arg))
+        {
+            delete matcher;
+            return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int rc = matcher->match_count() > 0 ? 0 : 1;
+    delete matcher;
+    return rc;
+}
+
+struct replace_options_t
+{
+    bool all;
+    bool ignore_case;
+    bool quiet;
+
+    replace_options_t(): all(false), ignore_case(false), quiet(false) { }
+};
+
+class string_replacer_t
+{
+protected:
+    replace_options_t opts;
+    const wchar_t *argv0;
+    int nreplace;
+
+public:
+    string_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_)
+        : opts(opts_), argv0(argv0_), nreplace(0)
+    { }
+
+    virtual ~string_replacer_t() {}
+    virtual bool replace_matches(const wchar_t *arg) = 0;
+    int replace_count() { return nreplace; }
+};
+
+class literal_replacer_t: public string_replacer_t
+{
+    const wchar_t *pattern;
+    const wchar_t *replacement;
+    int patlen;
+
+public:
+    literal_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_, const wchar_t *pattern_,
+                        const wchar_t *replacement_)
+        : string_replacer_t(argv0_, opts_),
+          pattern(pattern_), replacement(replacement_), patlen(wcslen(pattern))
+    { }
+
+    virtual ~literal_replacer_t() { }
+
+    bool replace_matches(const wchar_t *arg)
+    {
+        wcstring replaced;
+        if (patlen == 0)
+        {
+            replaced = arg;
+        }
+        else
+        {
+            const wchar_t *cur = arg;
+            while (*cur != L'\0')
+            {
+                if ((opts.all || nreplace == 0) &&
+                    (opts.ignore_case ? wcsncasecmp(cur, pattern, patlen) : wcsncmp(cur, pattern, patlen)) == 0)
+                {
+                    replaced += replacement;
+                    cur += patlen;
+                    nreplace++;
+                }
+                else
+                {
+                    replaced += *cur;
+                    cur++;
+                }
+            }
+        }
+        if (!opts.quiet)
+        {
+            stdout_buffer += replaced;
+            stdout_buffer += L'\n';
+        }
+        return true;
+    }
+};
+
+class regex_replacer_t: public string_replacer_t
+{
+    pcre2_code *regex;
+    pcre2_match_data *match;
+    const wchar_t *replacement;
+
+public:
+    regex_replacer_t(const wchar_t *argv0_, const replace_options_t &opts_, const wchar_t *pattern,
+                      const wchar_t *replacement_)
+        : string_replacer_t(argv0_, opts_),
+          regex(0), match(0), replacement(replacement_)
+    {
+        // Disable some sequences that can lead to security problems
+        uint32_t options = PCRE2_NEVER_UTF;
+#if PCRE2_CODE_UNIT_WIDTH < 32
+        options |= PCRE2_NEVER_BACKSLASH_C;
+#endif
+
+        int err_code = 0;
+        PCRE2_SIZE err_offset = 0;
+
+        regex = pcre2_compile(
+            PCRE2_SPTR(pattern),
+            PCRE2_ZERO_TERMINATED,
+            options | (opts.ignore_case ? PCRE2_CASELESS : 0),
+            &err_code,
+            &err_offset,
+            0);
+        if (regex == 0)
+        {
+            string_fatal_error(_(L"%ls: Regular expression compilation failed at offset %d"),
+                argv0, int(err_offset));
+            return;
+        }
+
+        match = pcre2_match_data_create_from_pattern(regex, 0);
+        if (match == 0)
+        {
+            DIE_MEM();
+        }
+    }
+
+    virtual ~regex_replacer_t()
+    {
+        if (match != 0)
+        {
+            pcre2_match_data_free(match);
+        }
+        if (regex != 0)
+        {
+            pcre2_code_free(regex);
+        }
+    }
+
+    bool replace_matches(const wchar_t *arg)
+    {
+        // A return value of true means all is well (even if no replacements
+        // were performed), false indicates an unrecoverable error.
+        if (regex == 0)
+        {
+            // pcre2_compile() failed
+            return false;
+        }
+
+        if (!opts.all && nreplace > 0)
+        {
+            if (!opts.quiet)
+            {
+                stdout_buffer += arg;
+                stdout_buffer += L'\n';
+            }
+            return true;
+        }
+
+        uint32_t options = opts.all ? PCRE2_SUBSTITUTE_GLOBAL : 0;
+        int arglen = wcslen(arg);
+        PCRE2_SIZE outlen = (arglen == 0) ? 16 : 2 * arglen;
+        wchar_t *output = (wchar_t *)malloc(sizeof(wchar_t) * outlen);
+        if (output == 0)
+        {
+            DIE_MEM();
+        }
+        int pcre2_rc = 0;
+        for (;;)
+        {
+            pcre2_rc = pcre2_substitute(
+                            regex,
+                            PCRE2_SPTR(arg),
+                            arglen,
+                            0,  // start offset
+                            options,
+                            match,
+                            0,  // match context
+                            PCRE2_SPTR(replacement),
+                            PCRE2_ZERO_TERMINATED,
+                            (PCRE2_UCHAR *)output,
+                            &outlen);
+
+            if (pcre2_rc == PCRE2_ERROR_NOMEMORY)
+            {
+                if (outlen < MAX_REPLACE_SIZE)
+                {
+                    outlen = std::max(2 * outlen, MAX_REPLACE_SIZE);
+                    output = (wchar_t *)realloc(output, sizeof(wchar_t) * outlen);
+                    if (output == 0)
+                    {
+                        DIE_MEM();
+                    }
+                    continue;
+                }
+                string_fatal_error(_(L"%ls: Replacement string too large"), argv0);
+                free(output);
+                return false;
+            }
+            break;
+        }
+
+        bool rc = true;
+        if (pcre2_rc == PCRE2_ERROR_BADREPLACEMENT)
+        {
+            string_fatal_error(_(L"%ls: Invalid use of $ in replacement string"), argv0);
+            rc = false;
+        }
+        else if (pcre2_rc < 0)
+        {
+            string_fatal_error(_(L"%ls: Regular expression match error %d"), argv0, pcre2_rc);
+            rc = false;
+        }
+        else
+        {
+            if (!opts.quiet)
+            {
+                stdout_buffer += output;
+                stdout_buffer += L'\n';
+            }
+            nreplace += pcre2_rc;
+        }
+
+        free(output);
+        return rc;
+    }
+};
+
+static int string_replace(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"aiqr";
+    const struct woption long_options[] =
+    {
+        { L"all", no_argument, 0, 'a'},
+        { L"ignore-case", no_argument, 0, 'i'},
+        { L"quiet", no_argument, 0, 'q'},
+        { L"regex", no_argument, 0, 'r'},
+        { 0, 0, 0, 0 }
+    };
+
+    replace_options_t opts;
+    bool regex = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'a':
+                opts.all = true;
+                break;
+
+            case 'i':
+                opts.ignore_case = true;
+                break;
+
+            case 'q':
+                opts.quiet = true;
+                break;
+
+            case 'r':
+                regex = true;
+                break;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    const wchar_t *pattern, *replacement;
+    if ((pattern = string_get_arg_argv(&i, argv)) == 0)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+    if ((replacement = string_get_arg_argv(&i, argv)) == 0)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    string_replacer_t *replacer;
+    if (regex)
+    {
+        replacer = new regex_replacer_t(argv[0], opts, pattern, replacement);
+    }
+    else
+    {
+        replacer = new literal_replacer_t(argv[0], opts, pattern, replacement);
+    }
+
+    const wchar_t *arg;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        if (!replacer->replace_matches(arg))
+        {
+            delete replacer;
+            return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int rc = replacer->replace_count() > 0 ? 0 : 1;
+    delete replacer;
+    return rc;
+}
+
+static int string_split(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L":m:qr";
+    const struct woption long_options[] =
+    {
+        { L"max", required_argument, 0, 'm'},
+        { L"quiet", no_argument, 0, 'q'},
+        { L"right", no_argument, 0, 'r'},
+        { 0, 0, 0, 0 }
+    };
+
+    int max = 0;
+    bool quiet = false;
+    bool right = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'm':
+                max = int(wcstol(w.woptarg, 0, 10));
+                break;
+
+            case 'q':
+                quiet = true;
+                break;
+
+            case 'r':
+                right = true;
+                break;
+
+            case ':':
+                string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+                return BUILTIN_STRING_ERROR;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    const wchar_t *sep;
+    if ((sep = string_get_arg_argv(&i, argv)) == 0)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    std::list<wcstring> splits;
+    int seplen = wcslen(sep);
+    int nsplit = 0;
+    const wchar_t *arg;
+    if (right)
+    {
+        while ((arg = string_get_arg(&i, argv)) != 0)
+        {
+            if (seplen == 0)
+            {
+                // Split to individual characters
+                const wchar_t *cur = arg + wcslen(arg) - 1;
+                while (cur > arg && (max == 0 || nsplit < max))
+                {
+                    splits.push_front(wcstring(cur, 1));
+                    cur--;
+                    nsplit++;
+                }
+                splits.push_front(wcstring(arg, cur - arg + 1));
+            }
+            else
+            {
+                const wchar_t *end = arg + wcslen(arg);
+                const wchar_t *cur = end - seplen;
+                while (cur >= arg && (max == 0 || nsplit < max))
+                {
+                    if (wcsncmp(cur, sep, seplen) == 0)
+                    {
+                        splits.push_front(wcstring(cur + seplen, end - cur - seplen));
+                        end = cur;
+                        cur -= seplen;
+                        nsplit++;
+                    }
+                    else
+                    {
+                        cur--;
+                    }
+                }
+                splits.push_front(wcstring(arg, end - arg));
+            }
+        }
+    }
+    else
+    {
+        while ((arg = string_get_arg(&i, argv)) != 0)
+        {
+            const wchar_t *cur = arg;
+            if (seplen == 0)
+            {
+                // Split to individual characters
+                const wchar_t *last = arg + wcslen(arg) - 1;
+                while (cur < last && (max == 0 || nsplit < max))
+                {
+                    splits.push_back(wcstring(cur, 1));
+                    cur++;
+                    nsplit++;
+                }
+                splits.push_back(cur);
+            }
+            else
+            {
+                while (cur != 0)
+                {
+                    const wchar_t *ptr = (max > 0 && nsplit >= max) ? 0 : wcsstr(cur, sep);
+                    if (ptr == 0)
+                    {
+                        splits.push_back(cur);
+                        cur = 0;
+                    }
+                    else
+                    {
+                        splits.push_back(wcstring(cur, ptr - cur));
+                        cur = ptr + seplen;
+                        nsplit++;
+                    }
+                }
+            }
+        }
+    }
+
+    if (!quiet)
+    {
+        std::list<wcstring>::const_iterator si = splits.begin();
+        while (si != splits.end())
+        {
+            stdout_buffer += *si;
+            stdout_buffer += L'\n';
+            si++;
+        }
+    }
+
+    return (nsplit > 0) ? 0 : 1;
+}
+
+static int string_sub(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L":l:qs:";
+    const struct woption long_options[] =
+    {
+        { L"length", required_argument, 0, 'l'},
+        { L"quiet", no_argument, 0, 'q'},
+        { L"start", required_argument, 0, 's'},
+        { 0, 0, 0, 0 }
+    };
+
+    int start = 0;
+    int length = -1;
+    bool quiet = false;
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'l':
+                length = int(wcstol(w.woptarg, 0, 10));
+                if (length < 0)
+                {
+                    string_fatal_error(_(L"%ls: Invalid length value\n"), argv[0]);
+                    return BUILTIN_STRING_ERROR;
+                }
+                break;
+
+            case 'q':
+                quiet = true;
+                break;
+
+            case 's':
+                start = int(wcstol(w.woptarg, 0, 10));
+                if (start == 0)
+                {
+                    string_fatal_error(_(L"%ls: Invalid start value\n"), argv[0]);
+                    return BUILTIN_STRING_ERROR;
+                }
+                break;
+
+            case ':':
+                string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+                return BUILTIN_STRING_ERROR;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    int nsub = 0;
+    const wchar_t *arg;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        wcstring::size_type pos = 0;
+        wcstring::size_type count = wcstring::npos;
+        wcstring s(arg);
+        if (start > 0)
+        {
+            pos = start - 1;
+        }
+        else if (start < 0)
+        {
+            wcstring::size_type n = -start;
+            pos = n > s.length() ? 0 : s.length() - n;
+        }
+        if (pos > s.length())
+        {
+            pos = s.length();
+        }
+
+        if (length >= 0)
+        {
+            count = length;
+        }
+        if (pos + count > s.length())
+        {
+            count = wcstring::npos;
+        }
+
+        if (!quiet)
+        {
+            stdout_buffer += s.substr(pos, count);
+            stdout_buffer += L'\n';
+        }
+        nsub++;
+    }
+
+    return (nsub > 0) ? 0 : 1;
+}
+
+static int string_trim(parser_t &parser, int argc, wchar_t **argv)
+{
+    const wchar_t *short_options = L"c:lqr";
+    const struct woption long_options[] =
+    {
+        { L"chars", required_argument, 0, 'c'},
+        { L"left", no_argument, 0, 'l'},
+        { L"quiet", no_argument, 0, 'q'},
+        { L"right", no_argument, 0, 'r'},
+        { 0, 0, 0, 0 }
+    };
+
+    int leftright = 0;
+    bool quiet = false;
+    wcstring chars = L" \f\n\r\t";
+    wgetopter_t w;
+    for (;;)
+    {
+        int c = w.wgetopt_long(argc, argv, short_options, long_options, 0);
+
+        if (c == -1)
+        {
+            break;
+        }
+        switch (c)
+        {
+            case 0:
+                break;
+
+            case 'c':
+                chars = w.woptarg;
+                break;
+
+            case 'l':
+                leftright |= 1;
+                break;
+
+            case 'q':
+                quiet = true;
+                break;
+
+            case 'r':
+                leftright |= 2;
+                break;
+
+            case ':':
+                string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+                return BUILTIN_STRING_ERROR;
+
+            case '?':
+                builtin_unknown_option(parser, argv[0], argv[w.woptind - 1]);
+                return BUILTIN_STRING_ERROR;
+        }
+    }
+
+    int i = w.woptind;
+    if (!isatty(builtin_stdin) && argc > i)
+    {
+        string_fatal_error(BUILTIN_ERR_TOO_MANY_ARGUMENTS, argv[0]);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    const wchar_t *arg;
+    int ntrim = 0;
+    while ((arg = string_get_arg(&i, argv)) != 0)
+    {
+        const wchar_t *begin = arg;
+        const wchar_t *end = arg + wcslen(arg);
+        if (!leftright || (leftright & 1))
+        {
+            while (begin != end && chars.find_first_of(begin, 0, 1) != wcstring::npos)
+            {
+                begin++;
+                ntrim++;
+            }
+        }
+        if (!leftright || (leftright & 2))
+        {
+            while (begin != end && chars.find_first_of(end - 1, 0, 1) != wcstring::npos)
+            {
+                end--;
+                ntrim++;
+            }
+        }
+        if (!quiet)
+        {
+            stdout_buffer += wcstring(begin, end - begin);
+            stdout_buffer += L'\n';
+        }
+    }
+
+    return (ntrim > 0) ? 0 : 1;
+}
+
+static const struct string_subcommand
+{
+    const wchar_t *name;
+    int (*handler)(parser_t &, int argc, wchar_t **argv);
+}
+string_subcommands[] =
+{
+    { L"escape", &string_escape },
+    { L"join", &string_join },
+    { L"length", &string_length },
+    { L"match", &string_match },
+    { L"replace", &string_replace },
+    { L"split", &string_split },
+    { L"sub", &string_sub },
+    { L"trim", &string_trim },
+    { 0, 0 }
+};
+
+/**
+   The string builtin, for manipulating strings.
+*/
+/*static*/ int builtin_string(parser_t &parser, wchar_t **argv)
+{
+    int argc = builtin_count_args(argv);
+    if (argc <= 1)
+    {
+        string_fatal_error(BUILTIN_ERR_MISSING, argv[0]);
+        builtin_print_help(parser, L"string", stderr_buffer);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    if (wcscmp(argv[1], L"-h") == 0 || wcscmp(argv[1], L"--help") == 0)
+    {
+        builtin_print_help(parser, L"string", stderr_buffer);
+        return BUILTIN_STRING_OK;
+    }
+
+    const string_subcommand *subcmd = &string_subcommands[0];
+    while (subcmd->name != 0 && wcscmp(subcmd->name, argv[1]) != 0)
+    {
+        subcmd++;
+    }
+    if (subcmd->handler == 0)
+    {
+        string_fatal_error(_(L"%ls: Unknown subcommand '%ls'"), argv[0], argv[1]);
+        builtin_print_help(parser, L"string", stderr_buffer);
+        return BUILTIN_STRING_ERROR;
+    }
+
+    argc--;
+    argv++;
+    return subcmd->handler(parser, argc, argv);
+}
diff --git a/src/fish_tests.cpp b/src/fish_tests.cpp
index eb26460be..ec564b5a1 100644
--- a/src/fish_tests.cpp
+++ b/src/fish_tests.cpp
@@ -4015,6 +4015,287 @@ static void test_wcstring_tok(void)
     }
 }
 
+int builtin_string(parser_t &parser, wchar_t **argv);
+extern wcstring stdout_buffer;
+static void run_one_string_test(const wchar_t **argv, int expected_rc, const wchar_t *expected_out)
+{
+    parser_t parser(PARSER_TYPE_GENERAL, true);
+    wcstring &out = stdout_buffer;
+    out.clear();
+    int rc = builtin_string(parser, const_cast<wchar_t**>(argv));
+    wcstring args;
+    for (int i = 0; argv[i] != 0; i++)
+    {
+        args += escape_string(argv[i], ESCAPE_ALL) + L' ';
+    }
+    args.resize(args.size() - 1);
+    if (rc != expected_rc)
+    {
+        err(L"Test failed on line %lu: [%ls]: expected return code %d but got %d",
+                __LINE__, args.c_str(), expected_rc, rc);
+    }
+    else if (out != expected_out)
+    {
+        err(L"Test failed on line %lu: [%ls]: expected [%ls] but got [%ls]",
+                __LINE__, args.c_str(),
+                escape_string(expected_out, ESCAPE_ALL).c_str(),
+                escape_string(out, ESCAPE_ALL).c_str());
+    }
+}
+
+static void test_string(void)
+{
+    static struct string_test
+    {
+        const wchar_t *argv[15];
+        int expected_rc;
+        const wchar_t *expected_out;
+    }
+    string_tests[] =
+    {
+        { {L"string", L"escape", 0},                                1, L"" },
+        { {L"string", L"escape", L"", 0},                           0, L"''\n" },
+        { {L"string", L"escape", L"-n", L"", 0},                    0, L"\n" },
+        { {L"string", L"escape", L"a", 0},                          0, L"a\n" },
+        { {L"string", L"escape", L"\x07", 0},                       0, L"\\cg\n" },
+        { {L"string", L"escape", L"\"x\"", 0},                      0, L"'\"x\"'\n" },
+        { {L"string", L"escape", L"hello world", 0},                0, L"'hello world'\n" },
+        { {L"string", L"escape", L"-n", L"hello world", 0},         0, L"hello\\ world\n" },
+        { {L"string", L"escape", L"hello", L"world", 0},            0, L"hello\nworld\n" },
+        { {L"string", L"escape", L"-n", L"~", 0},                   0, L"\\~\n" },
+
+        { {L"string", L"join", 0},                                  1, L"" },
+        { {L"string", L"join", L"", 0},                             1, L"" },
+        { {L"string", L"join", L"", L"", L"", L"", 0},              0, L"\n" },
+        { {L"string", L"join", L"", L"a", L"b", L"c", 0},           0, L"abc\n" },
+        { {L"string", L"join", L".", L"fishshell", L"com", 0},      0, L"fishshell.com\n" },
+        { {L"string", L"join", L"/", L"usr", 0},                    1, L"usr\n" },
+        { {L"string", L"join", L"/", L"usr", L"local", L"bin", 0},  0, L"usr/local/bin\n" },
+        { {L"string", L"join", L"...", L"3", L"2", L"1", 0},        0, L"3...2...1\n" },
+        { {L"string", L"join", L"-q", 0},                           1, L"" },
+        { {L"string", L"join", L"-q", L".", 0},                     1, L"" },
+        { {L"string", L"join", L"-q", L".", L".", 0},               1, L"" },
+
+        { {L"string", L"length", 0},                                1, L"" },
+        { {L"string", L"length", L"", 0},                           1, L"0\n" },
+        { {L"string", L"length", L"", L"", L"", 0},                 1, L"0\n0\n0\n" },
+        { {L"string", L"length", L"a", 0},                          0, L"1\n" },
+        { {L"string", L"length", L"\U0002008A", 0},                 0, L"1\n" },
+        { {L"string", L"length", L"um", L"dois", L"três", 0},       0, L"2\n4\n4\n" },
+        { {L"string", L"length", L"um", L"dois", L"três", 0},       0, L"2\n4\n4\n" },
+        { {L"string", L"length", L"-q", 0},                         1, L"" },
+        { {L"string", L"length", L"-q", L"", 0},                    1, L"" },
+        { {L"string", L"length", L"-q", L"a", 0},                   0, L"" },
+
+        { {L"string", L"match", 0},                                         1, L"" },
+        { {L"string", L"match", L"", 0},                                    1, L"" },
+        { {L"string", L"match", L"", L"", 0},                               0, L"\n" },
+        { {L"string", L"match", L"?", L"a", 0},                             0, L"a\n" },
+        { {L"string", L"match", L"*", L"", 0},                              0, L"\n" },
+        { {L"string", L"match", L"**", L"", 0},                             0, L"\n" },
+        { {L"string", L"match", L"*", L"xyzzy", 0},                         0, L"xyzzy\n" },
+        { {L"string", L"match", L"**", L"plugh", 0},                        0, L"plugh\n" },
+        { {L"string", L"match", L"a*b", L"axxb", 0},                        0, L"axxb\n" },
+        { {L"string", L"match", L"a??b", L"axxb", 0},                       0, L"axxb\n" },
+        { {L"string", L"match", L"-i", L"a??B", L"axxb", 0},                0, L"axxb\n" },
+        { {L"string", L"match", L"a*", L"axxb", 0},                         0, L"axxb\n" },
+        { {L"string", L"match", L"*a", L"xxa", 0},                          0, L"xxa\n" },
+        { {L"string", L"match", L"*a*", L"axa", 0},                         0, L"axa\n" },
+        { {L"string", L"match", L"*a*", L"xax", 0},                         0, L"xax\n" },
+        { {L"string", L"match", L"*a*", L"bxa", 0},                         0, L"bxa\n" },
+        { {L"string", L"match", L"*a", L"a", 0},                            0, L"a\n" },
+        { {L"string", L"match", L"a*", L"a", 0},                            0, L"a\n" },
+        { {L"string", L"match", L"a*b*c", L"axxbyyc", 0},                   0, L"axxbyyc\n" },
+        { {L"string", L"match", L"a*b?c", L"axxbyc", 0},                    0, L"axxbyc\n" },
+        { {L"string", L"match", L"*?", L"a", 0},                            0, L"a\n" },
+        { {L"string", L"match", L"*?", L"ab", 0},                           0, L"ab\n" },
+        { {L"string", L"match", L"?*", L"a", 0},                            0, L"a\n" },
+        { {L"string", L"match", L"?*", L"ab", 0},                           0, L"ab\n" },
+        { {L"string", L"match", L"[A-F][^A-F]", L"FG", 0},                  0, L"FG\n" },
+        { {L"string", L"match", L"[A][B]", L"AB", 0},                       0, L"AB\n" },
+        { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0x6a", 0},   0, L"0x6a\n" },
+        { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xA6", 0},   0, L"0xA6\n" },
+        { {L"string", L"match", L"-i", L"0x[0-9a-f][0-9A-F]", L"0xAb", 0},  0, L"0xAb\n" },
+        { {L"string", L"match", L"\\*", L"*", 0},                           0, L"*\n" },
+        { {L"string", L"match", L"a*\\", L"abc\\", 0},                      0, L"abc\\\n" },
+        { {L"string", L"match", L"a*\\?", L"abc?", 0},                      0, L"abc?\n" },
+
+        { {L"string", L"match", L"?", L"", 0},                              1, L"" },
+        { {L"string", L"match", L"?", L"ab", 0},                            1, L"" },
+        { {L"string", L"match", L"??", L"a", 0},                            1, L"" },
+        { {L"string", L"match", L"?a", L"a", 0},                            1, L"" },
+        { {L"string", L"match", L"a?", L"a", 0},                            1, L"" },
+        { {L"string", L"match", L"a??B", L"axxb", 0},                       1, L"" },
+        { {L"string", L"match", L"a*b", L"axxbc", 0},                       1, L"" },
+        { {L"string", L"match", L"*b", L"bbba", 0},                         1, L"" },
+        { {L"string", L"match", L"0x[0-9a-fA-F][0-9a-fA-F]", L"0xbad", 0},  1, L"" },
+
+        { {L"string", L"match", L"-a", L"*", L"ab", L"cde", 0},             0, L"ab\ncde\n" },
+        { {L"string", L"match", L"*", L"ab", L"cde", 0},                    0, L"ab\n" },
+        { {L"string", L"match", L"-n", L"*d*", L"cde", 0},                  0, L"1\n" },
+        { {L"string", L"match", L"-q", L"a*", L"b", L"c", 0},               1, L"" },
+        { {L"string", L"match", L"-q", L"a*", L"b", L"a", 0},               0, L"" },
+
+        { {L"string", L"match", L"-r", 0},                                  1, L"" },
+        { {L"string", L"match", L"-r", L"", 0},                             1, L"" },
+        { {L"string", L"match", L"-r", L"", L"", 0},                        0, L"\n" },
+        { {L"string", L"match", L"-r", L".", L"a", 0},                      0, L"a\n" },
+        { {L"string", L"match", L"-r", L".*", L"", 0},                      0, L"\n" },
+        { {L"string", L"match", L"-r", L"a*b", L"b", 0},                    0, L"b\n" },
+        { {L"string", L"match", L"-r", L"a*b", L"aab", 0},                  0, L"aab\n" },
+        { {L"string", L"match", L"-r", L"-i", L"a*b", L"Aab", 0},           0, L"Aab\n" },
+        { {L"string", L"match", L"-r", L"-a", L"a[bc]", L"abadac", 0},      0, L"ab\nac\n" },
+        { {L"string", L"match", L"-r", L"-a", L"a", L"x", L"a", L"x", L"a", 0}, 0, L"a\na\n" },
+        { {L"string", L"match", L"-r", L"a", L"x", L"a", L"x", L"a", 0},    0, L"a\n" },
+        { {L"string", L"match", L"-r", L"a[bc]", L"abadac", 0},             0, L"ab\n" },
+        { {L"string", L"match", L"-r", L"-q", L"a[bc]", L"abadac", 0},      0, L"" },
+        { {L"string", L"match", L"-r", L"-q", L"a[bc]", L"ad", 0},          1, L"" },
+        { {L"string", L"match", L"-r", L"(a+)b(c)", L"aabc", 0},            0, L"aabc\naa\nc\n" },
+        { {L"string", L"match", L"-r", L"-a", L"(a)b(c)", L"abcabc", 0},    0, L"abc\na\nc\nabc\na\nc\n" },
+        { {L"string", L"match", L"-r", L"(a)b(c)", L"abcabc", 0},           0, L"abc\na\nc\n" },
+        { {L"string", L"match", L"-r", L"(a|(z))(bc)", L"abc", 0},          0, L"abc\na\nbc\n" },
+        { {L"string", L"match", L"-r", L"-n", L"a", L"a", 0},               0, L"1\n" },
+        { {L"string", L"match", L"-r", L"-n", L"-a", L"a", L"bacadae", 0},  0, L"2\n4\n6\n" },
+        { {L"string", L"match", L"-r", L"-n", L"(a).*(b)", L"a---b", 0},    0, L"1\n1\n5\n" },
+        { {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"ab", 0}, 0, L"1\n1\n2\n" },
+        { {L"string", L"match", L"-r", L"-n", L"(a)(b)", L"abab", 0}, 0, L"1\n1\n2\n" },
+        { {L"string", L"match", L"-r", L"-n", L"-a", L"(a)(b)", L"abab", 0}, 0, L"1\n1\n2\n3\n3\n4\n" },
+        { {L"string", L"match", L"-r", L"*", L"", 0},                       1, L"" },
+        { {L"string", L"match", L"-r", L"foo\\Kbar", L"foobar", 0},         0, L"bar\n" },
+        { {L"string", L"match", L"-r", L"(foo)\\Kbar", L"foobar", 0},       0, L"bar\nfoo\n" },
+        { {L"string", L"match", L"-r", L"(?=ab\\K)", L"ab", 0},             0, L"\n" },
+        { {L"string", L"match", L"-r", L"(?=ab\\K)..(?=cd\\K)", L"abcd", 0}, 0, L"\n" },
+
+        { {L"string", L"replace", 0},                                       1, L"" },
+        { {L"string", L"replace", L"", 0},                                  1, L"" },
+        { {L"string", L"replace", L"", L"", 0},                             1, L"" },
+        { {L"string", L"replace", L"", L"", L"", 0},                        1, L"\n" },
+        { {L"string", L"replace", L"", L"", L" ", 0},                       1, L" \n" },
+        { {L"string", L"replace", L"a", L"b", L"", 0},                      1, L"\n" },
+        { {L"string", L"replace", L"a", L"b", L"a", 0},                     0, L"b\n" },
+        { {L"string", L"replace", L"a", L"b", L"xax", 0},                   0, L"xbx\n" },
+        { {L"string", L"replace", L"bar", L"x", L"red barn", 0},            0, L"red xn\n" },
+        { {L"string", L"replace", L"x", L"bar", L"red xn", 0},              0, L"red barn\n" },
+        { {L"string", L"replace", L"--", L"x", L"-", L"xyz", 0},            0, L"-yz\n" },
+        { {L"string", L"replace", L"--", L"y", L"-", L"xyz", 0},            0, L"x-z\n" },
+        { {L"string", L"replace", L"--", L"z", L"-", L"xyz", 0},            0, L"xy-\n" },
+        { {L"string", L"replace", L"-i", L"z", L"X", L"_Z_", 0},            0, L"_X_\n" },
+        { {L"string", L"replace", L"-a", L"a", L"A", L"aaa", 0},            0, L"AAA\n" },
+        { {L"string", L"replace", L"-i", L"a", L"z", L"AAA", 0},            0, L"zAA\n" },
+        { {L"string", L"replace", L"-q", L"x", L">x<", L"x", 0},            0, L"" },
+        { {L"string", L"replace", L"-a", L"x", L"", L"xxx", 0},             0, L"\n" },
+        { {L"string", L"replace", L"-a", L"***", L"_", L"*****", 0},        0, L"_**\n" },
+        { {L"string", L"replace", L"-a", L"***", L"***", L"******", 0},     0, L"******\n" },
+
+        { {L"string", L"replace", L"-r", 0},                                1, L"" },
+        { {L"string", L"replace", L"-r", L"", 0},                           1, L"" },
+        { {L"string", L"replace", L"-r", L"", L"", 0},                      1, L"" },
+        { {L"string", L"replace", L"-r", L"", L"", L"", 0},                 0, L"\n" },  // pcre2 behavior
+        { {L"string", L"replace", L"-r", L"", L"", L" ", 0},                0, L" \n" }, // pcre2 behavior
+        { {L"string", L"replace", L"-r", L"a", L"b", L"", 0},               1, L"\n" },
+        { {L"string", L"replace", L"-r", L"a", L"b", L"a", 0},              0, L"b\n" },
+        { {L"string", L"replace", L"-r", L".", L"x", L"abc", 0},            0, L"xbc\n" },
+        { {L"string", L"replace", L"-r", L".", L"", L"abc", 0},             0, L"bc\n" },
+        { {L"string", L"replace", L"-r", L"(\\w)(\\w)", L"$2$1", L"ab", 0}, 0, L"ba\n" },
+        { {L"string", L"replace", L"-r", L"(\\w)", L"$1$1", L"ab", 0},      0, L"aab\n" },
+        { {L"string", L"replace", L"-r", L"-a", L".", L"x", L"abc", 0},     0, L"xxx\n" },
+        { {L"string", L"replace", L"-r", L"-a", L"(\\w)", L"$1$1", L"ab", 0}, 0, L"aabb\n" },
+        { {L"string", L"replace", L"-r", L"-a", L".", L"", L"abc", 0},      0, L"\n" },
+        { {L"string", L"replace", L"-r", L"a", L"x", L"bc", L"cd", L"de", 0}, 1, L"bc\ncd\nde\n" },
+        { {L"string", L"replace", L"-r", L"a", L"x", L"bc", L"ca", L"ab", 0}, 0, L"bc\ncx\nab\n" },
+        { {L"string", L"replace", L"-r", L"-a", L"a", L"x", L"bc", L"ca", L"ab", 0}, 0, L"bc\ncx\nxb\n" },
+        { {L"string", L"replace", L"-r", L"-i", L"A", L"b", L"xax", 0},     0, L"xbx\n" },
+        { {L"string", L"replace", L"-r", L"-i", L"[a-z]", L".", L"1A2B", 0}, 0, L"1.2B\n" },
+        { {L"string", L"replace", L"-r", L"A", L"b", L"xax", 0},            1, L"xax\n" },
+        { {L"string", L"replace", L"-r", L"a", L"$1", L"a", 0},             1, L"" },
+        { {L"string", L"replace", L"-r", L"(a)", L"$2", L"a", 0},           1, L"" },
+        { {L"string", L"replace", L"-r", L"*", L".", L"a", 0},              1, L"" },
+
+        { {L"string", L"split", 0},                                         1, L"" },
+        { {L"string", L"split", L":", 0},                                   1, L"" },
+        { {L"string", L"split", L".", L"www.ch.ic.ac.uk", 0},               0, L"www\nch\nic\nac\nuk\n" },
+        { {L"string", L"split", L"..", L"....", 0},                         0, L"\n\n\n" },
+        { {L"string", L"split", L"-m1", L"..", L"....", 0},                 0, L"\n..\n" },
+        { {L"string", L"split", L"-m0", L"/", L"/usr/local/bin/fish", 0},   0, L"\nusr\nlocal\nbin\nfish\n" },
+        { {L"string", L"split", L"-m2", L":", L"a:b", L"c:d", L"e:f", 0},   0, L"a\nb\nc\nd\ne:f\n" },
+        { {L"string", L"split", L"-m1", L"-r", L"/", L"/usr/local/bin/fish", 0}, 0, L"/usr/local/bin\nfish\n" },
+        { {L"string", L"split", L"-r", L".", L"www.ch.ic.ac.uk", 0},        0, L"www\nch\nic\nac\nuk\n" },
+        { {L"string", L"split", L"--", L"--", L"a--b---c----d", 0},         0, L"a\nb\n-c\n\nd\n" },
+        { {L"string", L"split", L"-r", L"..", L"....", 0},                  0, L"\n\n\n" },
+        { {L"string", L"split", L"-r", L"--", L"--", L"a--b---c----d", 0},  0, L"a\nb-\nc\n\nd\n" },
+        { {L"string", L"split", L"", L"", 0},                               1, L"\n" },
+        { {L"string", L"split", L"", L"a", 0},                              1, L"a\n" },
+        { {L"string", L"split", L"", L"ab", 0},                             0, L"a\nb\n" },
+        { {L"string", L"split", L"", L"abc", 0},                            0, L"a\nb\nc\n" },
+        { {L"string", L"split", L"-m1", L"", L"abc", 0},                    0, L"a\nbc\n" },
+        { {L"string", L"split", L"-r", L"", L"", 0},                        1, L"\n" },
+        { {L"string", L"split", L"-r", L"", L"a", 0},                       1, L"a\n" },
+        { {L"string", L"split", L"-r", L"", L"ab", 0},                      0, L"a\nb\n" },
+        { {L"string", L"split", L"-r", L"", L"abc", 0},                     0, L"a\nb\nc\n" },
+        { {L"string", L"split", L"-r", L"-m1", L"", L"abc", 0},             0, L"ab\nc\n" },
+        { {L"string", L"split", L"-q", 0},                                  1, L"" },
+        { {L"string", L"split", L"-q", L":", 0},                            1, L"" },
+        { {L"string", L"split", L"-q", L"x", L"axbxc", 0},                  0, L"" },
+
+        { {L"string", L"sub", 0},                                   1, L"" },
+        { {L"string", L"sub", L"abcde", 0},                         0, L"abcde\n"},
+        { {L"string", L"sub", L"-l0", L"abcde", 0},                 0, L"\n"},
+        { {L"string", L"sub", L"-l2", L"abcde", 0},                 0, L"ab\n"},
+        { {L"string", L"sub", L"-l5", L"abcde", 0},                 0, L"abcde\n"},
+        { {L"string", L"sub", L"-l6", L"abcde", 0},                 0, L"abcde\n"},
+        { {L"string", L"sub", L"-l-1", L"abcde", 0},                1, L""},
+        { {L"string", L"sub", L"-s0", L"abcde", 0},                 1, L""},
+        { {L"string", L"sub", L"-s1", L"abcde", 0},                 0, L"abcde\n"},
+        { {L"string", L"sub", L"-s5", L"abcde", 0},                 0, L"e\n"},
+        { {L"string", L"sub", L"-s6", L"abcde", 0},                 0, L"\n"},
+        { {L"string", L"sub", L"-s-1", L"abcde", 0},                0, L"e\n"},
+        { {L"string", L"sub", L"-s-5", L"abcde", 0},                0, L"abcde\n"},
+        { {L"string", L"sub", L"-s-6", L"abcde", 0},                0, L"abcde\n"},
+        { {L"string", L"sub", L"-s1", L"-l0", L"abcde", 0},         0, L"\n"},
+        { {L"string", L"sub", L"-s1", L"-l1", L"abcde", 0},         0, L"a\n"},
+        { {L"string", L"sub", L"-s2", L"-l2", L"abcde", 0},         0, L"bc\n"},
+        { {L"string", L"sub", L"-s-1", L"-l1", L"abcde", 0},        0, L"e\n"},
+        { {L"string", L"sub", L"-s-1", L"-l2", L"abcde", 0},        0, L"e\n"},
+        { {L"string", L"sub", L"-s-3", L"-l2", L"abcde", 0},        0, L"cd\n"},
+        { {L"string", L"sub", L"-s-3", L"-l4", L"abcde", 0},        0, L"cde\n"},
+        { {L"string", L"sub", L"-q", 0},                            1, L"" },
+        { {L"string", L"sub", L"-q", L"abcde", 0},                  0, L""},
+
+        { {L"string", L"trim", 0},                                  1, L""},
+        { {L"string", L"trim", L""},                                1, L"\n"},
+        { {L"string", L"trim", L" "},                               0, L"\n"},
+        { {L"string", L"trim", L"  \f\n\r\t"},                      0, L"\n"},
+        { {L"string", L"trim", L" a"},                              0, L"a\n"},
+        { {L"string", L"trim", L"a "},                              0, L"a\n"},
+        { {L"string", L"trim", L" a "},                             0, L"a\n"},
+        { {L"string", L"trim", L"-l", L" a"},                       0, L"a\n"},
+        { {L"string", L"trim", L"-l", L"a "},                       1, L"a \n"},
+        { {L"string", L"trim", L"-l", L" a "},                      0, L"a \n"},
+        { {L"string", L"trim", L"-r", L" a"},                       1, L" a\n"},
+        { {L"string", L"trim", L"-r", L"a "},                       0, L"a\n"},
+        { {L"string", L"trim", L"-r", L" a "},                      0, L" a\n"},
+        { {L"string", L"trim", L"-c", L".", L" a"},                 1, L" a\n"},
+        { {L"string", L"trim", L"-c", L".", L"a "},                 1, L"a \n"},
+        { {L"string", L"trim", L"-c", L".", L" a "},                1, L" a \n"},
+        { {L"string", L"trim", L"-c", L".", L".a"},                 0, L"a\n"},
+        { {L"string", L"trim", L"-c", L".", L"a."},                 0, L"a\n"},
+        { {L"string", L"trim", L"-c", L".", L".a."},                0, L"a\n"},
+        { {L"string", L"trim", L"-c", L"\\/", L"/a\\"},             0, L"a\n"},
+        { {L"string", L"trim", L"-c", L"\\/", L"a/"},               0, L"a\n"},
+        { {L"string", L"trim", L"-c", L"\\/", L"\\a/"},             0, L"a\n"},
+        { {L"string", L"trim", L"-c", L"", L".a."},                 1, L".a.\n"},
+
+        { {0}, 0, 0 }
+    };
+
+    struct string_test *t = string_tests;
+    while (t->argv[0] != 0)
+    {
+        run_one_string_test(t->argv, t->expected_rc, t->expected_out);
+        t++;
+    }
+}
+
 /**
    Main test
 */
@@ -4106,6 +4387,7 @@ int main(int argc, char **argv)
     if (should_test_function("history_races")) history_tests_t::test_history_races();
     if (should_test_function("history_formats")) history_tests_t::test_history_formats();
     //history_tests_t::test_history_speed();
+    if (should_test_function("string")) test_string();
 
     say(L"Encountered %d errors in low-level tests", err_count);
     if (s_test_run_count == 0)