Merge branch 'master' into ast

This commit is contained in:
ridiculousfish 2013-11-24 23:45:59 -08:00
commit 920adb8296

View file

@ -1633,562 +1633,6 @@ static bool unescape_string_internal(const wchar_t * const input, const size_t i
return ! errored;
}
wchar_t *unescape(const wchar_t * orig, int flags)
{
int out_pos;
size_t in_pos;
size_t len;
int c;
int bracket_count=0;
wchar_t prev=0;
wchar_t *in;
bool unescape_special = !!(flags & UNESCAPE_SPECIAL);
bool allow_incomplete = !!(flags & UNESCAPE_INCOMPLETE);
CHECK(orig, 0);
len = wcslen(orig);
in = wcsdup(orig);
if (!in)
DIE_MEM();
enum
{
mode_unquoted,
mode_single_quotes,
mode_double_quotes
} mode = mode_unquoted;
for (in_pos=0, out_pos=0;
in_pos<len;
(prev=(out_pos>=0)?in[out_pos]:0), out_pos++, in_pos++)
{
c = in[in_pos];
switch (mode)
{
/*
Mode 0 means unquoted string
*/
case mode_unquoted:
{
if (c == L'\\')
{
switch (in[++in_pos])
{
/*
A null character after a backslash is an
error, return null
*/
case L'\0':
{
if (!allow_incomplete)
{
free(in);
return 0;
}
}
/*
Numeric escape sequences. No prefix means
octal escape, otherwise hexadecimal.
*/
case L'0':
case L'1':
case L'2':
case L'3':
case L'4':
case L'5':
case L'6':
case L'7':
case L'u':
case L'U':
case L'x':
case L'X':
{
int i;
long long res=0;
int chars=2;
int base=16;
int byte = 0;
wchar_t max_val = ASCII_MAX;
switch (in[in_pos])
{
case L'u':
{
chars=4;
max_val = UCS2_MAX;
break;
}
case L'U':
{
chars=8;
max_val = WCHAR_MAX;
break;
}
case L'x':
{
break;
}
case L'X':
{
byte=1;
max_val = BYTE_MAX;
break;
}
default:
{
base=8;
chars=3;
// note in_pod must be larger than 0 since we incremented it above
assert(in_pos > 0);
in_pos--;
break;
}
}
for (i=0; i<chars; i++)
{
long d = convert_digit(in[++in_pos],base);
if (d < 0)
{
in_pos--;
break;
}
res=(res*base)+d;
}
if ((res <= max_val))
{
in[out_pos] = (wchar_t)((byte?ENCODE_DIRECT_BASE:0)+res);
}
else
{
free(in);
return 0;
}
break;
}
/*
\a means bell (alert)
*/
case L'a':
{
in[out_pos]=L'\a';
break;
}
/*
\b means backspace
*/
case L'b':
{
in[out_pos]=L'\b';
break;
}
/*
\cX means control sequence X
*/
case L'c':
{
in_pos++;
if (in[in_pos] >= L'a' &&
in[in_pos] <= (L'a'+32))
{
in[out_pos]=in[in_pos]-L'a'+1;
}
else if (in[in_pos] >= L'A' &&
in[in_pos] <= (L'A'+32))
{
in[out_pos]=in[in_pos]-L'A'+1;
}
else
{
free(in);
return 0;
}
break;
}
/*
\x1b means escape
*/
case L'e':
{
in[out_pos]=L'\x1b';
break;
}
/*
\f means form feed
*/
case L'f':
{
in[out_pos]=L'\f';
break;
}
/*
\n means newline
*/
case L'n':
{
in[out_pos]=L'\n';
break;
}
/*
\r means carriage return
*/
case L'r':
{
in[out_pos]=L'\r';
break;
}
/*
\t means tab
*/
case L't':
{
in[out_pos]=L'\t';
break;
}
/*
\v means vertical tab
*/
case L'v':
{
in[out_pos]=L'\v';
break;
}
/* If a backslash is followed by an actual newline, swallow them both */
case L'\n':
out_pos--;
break;
default:
{
if (unescape_special)
in[out_pos++] = INTERNAL_SEPARATOR;
in[out_pos]=in[in_pos];
break;
}
}
}
else
{
switch (in[in_pos])
{
case L'~':
{
if (unescape_special && (in_pos == 0))
{
in[out_pos]=HOME_DIRECTORY;
}
else
{
in[out_pos] = L'~';
}
break;
}
case L'%':
{
if (unescape_special && (in_pos == 0))
{
in[out_pos]=PROCESS_EXPAND;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'*':
{
if (unescape_special)
{
if (out_pos > 0 && in[out_pos-1]==ANY_STRING)
{
out_pos--;
in[out_pos] = ANY_STRING_RECURSIVE;
}
else
in[out_pos]=ANY_STRING;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'?':
{
if (unescape_special)
{
in[out_pos]=ANY_CHAR;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'$':
{
if (unescape_special)
{
in[out_pos]=VARIABLE_EXPAND;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'{':
{
if (unescape_special)
{
bracket_count++;
in[out_pos]=BRACKET_BEGIN;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'}':
{
if (unescape_special)
{
bracket_count--;
in[out_pos]=BRACKET_END;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L',':
{
if (unescape_special && bracket_count && prev!=BRACKET_SEP)
{
in[out_pos]=BRACKET_SEP;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
case L'\'':
{
mode = mode_single_quotes;
if (unescape_special)
in[out_pos] = INTERNAL_SEPARATOR;
else
out_pos--;
break;
}
case L'\"':
{
mode = mode_double_quotes;
if (unescape_special)
in[out_pos] = INTERNAL_SEPARATOR;
else
out_pos--;
break;
}
default:
{
in[out_pos] = in[in_pos];
break;
}
}
}
break;
}
/*
Mode 1 means single quoted string, i.e 'foo'.
A backslash at the end of a line in a single quoted string does not swallow the backslash or newline.
*/
case mode_single_quotes:
{
if (c == L'\\')
{
switch (in[++in_pos])
{
case '\\':
case L'\'':
{
in[out_pos]=in[in_pos];
break;
}
case L'\0':
{
if (!allow_incomplete)
{
free(in);
return 0;
}
else
{
//We may ever escape a NULL character, but still appending a \ in case I am wrong.
in[out_pos] = L'\\';
}
}
break;
default:
{
in[out_pos++] = L'\\';
in[out_pos]= in[in_pos];
}
}
}
if (c == L'\'')
{
if (unescape_special)
in[out_pos] = INTERNAL_SEPARATOR;
else
out_pos--;
mode = mode_unquoted;
}
else
{
in[out_pos] = in[in_pos];
}
break;
}
/*
Mode 2 means double quoted string, i.e. "foo"
*/
case mode_double_quotes:
{
switch (c)
{
case '"':
{
mode = mode_unquoted;
if (unescape_special)
in[out_pos] = INTERNAL_SEPARATOR;
else
out_pos--;
break;
}
case '\\':
{
switch (in[++in_pos])
{
case L'\0':
{
if (!allow_incomplete)
{
free(in);
return 0;
}
else
{
//We probably don't need it since NULL character is always appended before ending this function.
in[out_pos]=in[in_pos];
}
}
break;
case '\\':
case L'$':
case '"':
{
in[out_pos]=in[in_pos];
break;
}
case '\n':
{
out_pos--;
break;
}
default:
{
in[out_pos++] = L'\\';
in[out_pos] = in[in_pos];
break;
}
}
break;
}
case '$':
{
if (unescape_special)
{
in[out_pos]=VARIABLE_EXPAND_SINGLE;
}
else
{
in[out_pos]=in[in_pos];
}
break;
}
default:
{
in[out_pos] = in[in_pos];
break;
}
}
break;
}
}
}
if (!allow_incomplete && mode)
{
free(in);
return 0;
}
in[out_pos]=L'\0';
return in;
}
bool unescape_string_in_place(wcstring *str, unescape_flags_t escape_special)
{
assert(str != NULL);