From 1977d3beb34bbe6c4ae73131266904f8b6f75886 Mon Sep 17 00:00:00 2001 From: axel Date: Sat, 28 Jan 2006 12:03:29 +1000 Subject: [PATCH] Add support for correctly handling illegal character strings darcs-hash:20060128020329-ac50b-d3499e6ff9108cef25bf7e00f39bbe1476896e07.gz --- common.c | 150 +++++++++++++++++++++++++++++++++++++-------------- common.h | 5 ++ exec.c | 11 +--- fish_pager.c | 20 ++++++- 4 files changed, 137 insertions(+), 49 deletions(-) diff --git a/common.c b/common.c index 093b0eed1..5226588c4 100644 --- a/common.c +++ b/common.c @@ -59,11 +59,6 @@ parts of fish. #include "wildcard.h" #include "parser.h" -/** - Error message to show on string convertion error -*/ -#define STR2WCS_MSG "fish: Invalid multibyte sequence \'" - /** The maximum number of minor errors to report. Further errors will be omitted. */ @@ -255,38 +250,51 @@ void sort_list( array_list_t *comp ) wchar_t *str2wcs( const char *in ) { - wchar_t *res; + wchar_t *out; + size_t res=0; + int in_pos=0; + int out_pos = 0; + size_t len = strlen(in); + mbstate_t state; - res = malloc( sizeof(wchar_t)*(strlen(in)+1) ); + out = malloc( sizeof(wchar_t)*(len+1) ); + memset( &state, 0, sizeof(state) ); - if( !res ) + if( !out ) { die_mem(); } - - if( (size_t)-1 == mbstowcs( res, in, sizeof(wchar_t)*(strlen(in)) +1) ) + + while( in[in_pos] ) { - error_count++; - if( error_count <=error_max ) + res = mbrtowc( &out[out_pos], &in[in_pos], len-in_pos, &state ); + + switch( res ) { - fflush( stderr ); - write( 2, - STR2WCS_MSG, - strlen(STR2WCS_MSG) ); - write( 2, - in, - strlen(in )); - write( 2, - "\'\n", - 2 ); + case (size_t)(-2): + case (size_t)(-1): + { + out[out_pos] = ENCODE_DIRECT_BASE + (unsigned char)in[in_pos]; + in_pos++; + memset( &state, 0, sizeof(state) ); + break; + } + + case 0: + { + return out; + } + default: + { + in_pos += res; + break; + } } - - free(res); - return 0; - } - - return res; + out_pos++; + } + out[out_pos] = 0; + return out; } void error_reset() @@ -296,20 +304,51 @@ void error_reset() char *wcs2str( const wchar_t *in ) { - char *res = malloc( MAX_UTF8_BYTES*wcslen(in)+1 ); + char *out; + size_t res=0; + int in_pos=0; + int out_pos = 0; + mbstate_t state; + + out = malloc( MAX_UTF8_BYTES*wcslen(in)+1 ); + memset( &state, 0, sizeof(state) ); - if( res == 0 ) + if( !out ) { die_mem(); } - wcstombs( res, - in, - MAX_UTF8_BYTES*wcslen(in)+1 ); - - res = realloc( res, strlen( res )+1 ); - - return res; + while( in[in_pos] ) + { + if( ( in[in_pos] >= ENCODE_DIRECT_BASE) && + ( in[in_pos] < ENCODE_DIRECT_BASE+256) ) + { + out[out_pos++] = in[in_pos]- ENCODE_DIRECT_BASE; + } + else + { + res = wcrtomb( &out[out_pos], in[in_pos], &state ); + + switch( res ) + { + case (size_t)(-1): + { + debug( 1, L"Wide character has no narrow representation" ); + memset( &state, 0, sizeof(state) ); + break; + } + default: + { + out_pos += res; + break; + } + } + } + in_pos++; + } + out[out_pos] = 0; + + return out; } char **wcsv2strv( const wchar_t **in ) @@ -752,6 +791,26 @@ wchar_t *escape( const wchar_t *in, while( *in != 0 ) { + + if( ( *in >= ENCODE_DIRECT_BASE) && + ( *in < ENCODE_DIRECT_BASE+256) ) + { + int val = *in - ENCODE_DIRECT_BASE; + int tmp; + + *(pos++) = L'\\'; + *(pos++) = L'X'; + + tmp = val/16; + *pos++ = tmp > 9? L'a'+(tmp-10):L'0'+tmp; + + tmp = val%16; + *pos++ = tmp > 9? L'a'+(tmp-10):L'0'+tmp; + + } + else + { + switch( *in ) { case L'\t': @@ -820,6 +879,8 @@ wchar_t *escape( const wchar_t *in, *pos++ = *in; break; } + } + in++; } *pos = 0; @@ -892,6 +953,7 @@ wchar_t *unescape( const wchar_t * orig, int unescape_special ) break; } + case L'X': case L'u': case L'U': case L'x': @@ -901,7 +963,9 @@ wchar_t *unescape( const wchar_t * orig, int unescape_special ) wchar_t res=0; int chars=2; int base=16; - + + int byte = 0; + switch( in[in_pos] ) { case L'u': @@ -925,6 +989,14 @@ wchar_t *unescape( const wchar_t * orig, int unescape_special ) break; } + case L'X': + { + byte=1; + base=16; + chars=2; + break; + } + case L'o': { base=8; @@ -947,7 +1019,7 @@ wchar_t *unescape( const wchar_t * orig, int unescape_special ) } - in[out_pos] = res; + in[out_pos] = (byte?ENCODE_DIRECT_BASE:0)+res; break; } diff --git a/common.h b/common.h index 57245c5b7..7fa8ed407 100644 --- a/common.h +++ b/common.h @@ -42,6 +42,11 @@ typedef char tputs_arg_t; */ #define FISH_COLOR_RESET -2 +/** + This is in the unicode private use area. +*/ +#define ENCODE_DIRECT_BASE 0xf000 + /** Save the shell mode on startup so we can restore them on exit */ diff --git a/exec.c b/exec.c index f10d630b7..ebae1a5ba 100644 --- a/exec.c +++ b/exec.c @@ -1312,15 +1312,8 @@ int exec_subshell( const wchar_t *cmd, { wchar_t *el; *end=0; - el = str2wcs( begin ); - if( !el ) - { - debug( 0, _( L"Subshell '%ls' returned illegal string, discarded one entry" ), cmd ); - } - else - { - al_push( l, el ); - } + el = str2wcs( begin ); + al_push( l, el ); begin = end+1; break; } diff --git a/fish_pager.c b/fish_pager.c index 75a23ac9e..018f869d9 100644 --- a/fish_pager.c +++ b/fish_pager.c @@ -412,6 +412,18 @@ static void printed_length( wchar_t *str, int has_description = 0; while( *str != 0 ) { + if( ( *str >= ENCODE_DIRECT_BASE) && + ( *str < ENCODE_DIRECT_BASE+256) ) + { + if( has_description ) + desc_len+=4; + else + comp_len+=4; + + } + else + { + switch( *str ) { case L'\n': @@ -460,6 +472,8 @@ static void printed_length( wchar_t *str, comp_len+= wcwidth(*str); break; } + } + str++; } if( has_description ) @@ -968,7 +982,11 @@ int main( int argc, char **argv ) for( i=3; i