From c5031c2b3902d15302f1230652c8e67a84c64bde Mon Sep 17 00:00:00 2001 From: ridiculousfish Date: Wed, 12 Dec 2012 15:44:01 -0800 Subject: [PATCH] Output embedded null characters more often https://github.com/fish-shell/fish-shell/issues/444 --- common.cpp | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- common.h | 10 ---------- exec.cpp | 34 ++++++++++++++++++---------------- 3 files changed, 66 insertions(+), 30 deletions(-) diff --git a/common.cpp b/common.cpp index 761d70ef2..0fb8b774b 100644 --- a/common.cpp +++ b/common.cpp @@ -102,6 +102,7 @@ int debug_level=1; */ static struct winsize termsize; +static char *wcs2str_internal(const wchar_t *in, char *out); void show_stackframe() { @@ -292,15 +293,58 @@ char *wcs2str(const wchar_t *in) return wcs2str_internal(in, out); } +/* This function is distinguished from wcs2str_internal in that it allows embedded null bytes */ std::string wcs2string(const wcstring &input) { - char *tmp = wcs2str(input.c_str()); - std::string result = tmp; - free(tmp); + std::string result; + result.reserve(input.size()); + + mbstate_t state; + memset(&state, 0, sizeof(state)); + + size_t converted_max_len = (size_t)(1 + __mb_cur_max); + char *converted = new char[converted_max_len]; + + for (size_t i=0; i < input.size(); i++) + { + wchar_t wc = input[i]; + if (wc == INTERNAL_SEPARATOR) + { + } + else if ((wc >= ENCODE_DIRECT_BASE) && + (wc < ENCODE_DIRECT_BASE+256)) + { + result.push_back(wc - ENCODE_DIRECT_BASE); + } + else + { + bzero(converted, converted_max_len); + size_t len = wcrtomb(converted, wc, &state); + if (len == (size_t)(-1)) + { + debug(1, L"Wide character %d has no narrow representation", wc); + memset(&state, 0, sizeof(state)); + } + else + { + result.append(converted, len); + } + } + } + + delete [] converted; return result; } -char *wcs2str_internal(const wchar_t *in, char *out) +/** + Converts the wide character string \c in into it's narrow + equivalent, stored in \c out. \c out must have enough space to fit + the entire string. + + This function decodes illegal character sequences in a reversible + way using the private use area. +*/ +static char *wcs2str_internal(const wchar_t *in, char *out) { size_t res=0; size_t in_pos=0; diff --git a/common.h b/common.h index 6b53a5c12..184b3cf4e 100644 --- a/common.h +++ b/common.h @@ -273,16 +273,6 @@ void assert_is_background_thread(const char *who); void assert_is_locked(void *mutex, const char *who, const char *caller); #define ASSERT_IS_LOCKED(x) assert_is_locked((void *)(&x), #x, __FUNCTION__) -/** - Converts the wide character string \c in into it's narrow - equivalent, stored in \c out. \c out must have enough space to fit - the entire string. - - This function decodes illegal character sequences in a reversible - way using the private use area. -*/ -char *wcs2str_internal(const wchar_t *in, char *out); - /** Format the specified size (in bytes, kilobytes, etc.) into the specified stringbuffer. */ wcstring format_size(long long sz); diff --git a/exec.cpp b/exec.cpp index 3063b02e6..4a2d0419b 100644 --- a/exec.cpp +++ b/exec.cpp @@ -498,13 +498,12 @@ static void internal_exec_helper(parser_t &parser, } /** Perform output from builtins. Called from a forked child, so don't do anything that may allocate memory, etc.. */ -static void do_builtin_io(const char *out, const char *err) +static void do_builtin_io(const char *out, size_t outlen, const char *err, size_t errlen) { - size_t len; - if (out && (len = strlen(out))) + if (out && outlen) { - if (write_loop(STDOUT_FILENO, out, len) == -1) + if (write_loop(STDOUT_FILENO, out, outlen) == -1) { debug(0, L"Error while writing to stdout"); wperror(L"write_loop"); @@ -512,9 +511,9 @@ static void do_builtin_io(const char *out, const char *err) } } - if (err && (len = strlen(err))) + if (err && errlen) { - if (write_loop(STDERR_FILENO, err, len) == -1) + if (write_loop(STDERR_FILENO, err, errlen) == -1) { /* Can't really show any error message here, since stderr is @@ -1189,10 +1188,9 @@ void exec(parser_t &parser, job_t *j) printf("fork #-: Skipping fork for internal builtin for '%ls'\n", p->argv0()); } const wcstring &out = get_stdout_buffer(), &err = get_stderr_buffer(); - char *outbuff = wcs2str(out.c_str()), *errbuff = wcs2str(err.c_str()); - do_builtin_io(outbuff, errbuff); - free(outbuff); - free(errbuff); + const std::string outbuff = wcs2string(out); + const std::string errbuff = wcs2string(err); + do_builtin_io(outbuff.data(), outbuff.size(), errbuff.data(), errbuff.size()); skip_fork = 1; } @@ -1225,7 +1223,15 @@ void exec(parser_t &parser, job_t *j) /* Get the strings we'll write before we fork (since they call malloc) */ const wcstring &out = get_stdout_buffer(), &err = get_stderr_buffer(); - char *outbuff = wcs2str(out.c_str()), *errbuff = wcs2str(err.c_str()); + + /* These strings may contain embedded nulls, so don't treat them as C strings */ + const std::string outbuff_str = wcs2string(out); + const char *outbuff = outbuff_str.data(); + size_t outbuff_len = outbuff_str.size(); + + const std::string errbuff_str = wcs2string(err); + const char *errbuff = errbuff_str.data(); + size_t errbuff_len = errbuff_str.size(); fflush(stdout); fflush(stderr); @@ -1244,16 +1250,12 @@ void exec(parser_t &parser, job_t *j) */ p->pid = getpid(); setup_child_process(j, p); - do_builtin_io(outbuff, errbuff); + do_builtin_io(outbuff, outbuff_len, errbuff, errbuff_len); exit_without_destructors(p->status); } else { - /* Free the strings in the parent */ - free(outbuff); - free(errbuff); - /* This is the parent process. Store away information on the child, and possibly give