Improve history robustness against corrupt files

Fixes #1581
This commit is contained in:
ridiculousfish 2014-07-29 14:41:21 -07:00
parent b6658c5497
commit 52ae5f885e
6 changed files with 85 additions and 45 deletions

View file

@ -3583,7 +3583,7 @@ static int builtin_history(parser_t &parser, wchar_t **argv)
if (argc == 1) if (argc == 1)
{ {
wcstring full_history; wcstring full_history;
history->get_string_representation(full_history, wcstring(L"\n")); history->get_string_representation(&full_history, wcstring(L"\n"));
stdout_buffer.append(full_history); stdout_buffer.append(full_history);
stdout_buffer.push_back('\n'); stdout_buffer.push_back('\n');
return STATUS_BUILTIN_OK; return STATUS_BUILTIN_OK;

View file

@ -953,7 +953,7 @@ env_var_t env_get_string(const wcstring &key, env_mode_flags_t mode)
history = &history_t::history_with_name(L"fish"); history = &history_t::history_with_name(L"fish");
} }
if (history) if (history)
history->get_string_representation(result, ARRAY_SEP_STR); history->get_string_representation(&result, ARRAY_SEP_STR);
return result; return result;
} }
else if (key == L"COLUMNS") else if (key == L"COLUMNS")

View file

@ -2827,7 +2827,7 @@ static bool history_equals(history_t &hist, const wchar_t * const *strings)
void history_tests_t::test_history_formats(void) void history_tests_t::test_history_formats(void)
{ {
const wchar_t *name; const wchar_t *name;
// Test inferring and reading legacy and bash history formats // Test inferring and reading legacy and bash history formats
name = L"history_sample_fish_1_x"; name = L"history_sample_fish_1_x";
say(L"Testing %ls", name); say(L"Testing %ls", name);
@ -2920,6 +2920,33 @@ void history_tests_t::test_history_formats(void)
test_history.clear(); test_history.clear();
fclose(f); fclose(f);
} }
name = L"history_sample_corrupt1";
say(L"Testing %ls", name);
if (! install_sample_history(name))
{
err(L"Couldn't open file tests/%ls", name);
}
else
{
/* We simply invoke get_string_representation. If we don't die, the test is a success. */
history_t &test_history = history_t::history_with_name(name);
const wchar_t *expected[] =
{
L"no_newline_at_end_of_file",
L"corrupt_prefix",
L"this_command_is_ok",
NULL
};
if (! history_equals(test_history, expected))
{
err(L"test_history_formats failed for %ls\n", name);
}
test_history.clear();
}
} }
void history_tests_t::test_history_speed(void) void history_tests_t::test_history_speed(void)

View file

@ -215,10 +215,10 @@ static std::map<wcstring, history_t *> histories;
static wcstring history_filename(const wcstring &name, const wcstring &suffix); static wcstring history_filename(const wcstring &name, const wcstring &suffix);
/** Replaces newlines with a literal backslash followed by an n, and replaces backslashes with two backslashes. */ /** Replaces newlines with a literal backslash followed by an n, and replaces backslashes with two backslashes. */
static void escape_yaml(std::string &str); static void escape_yaml(std::string *str);
/** Undoes escape_yaml */ /** Undoes escape_yaml */
static void unescape_yaml(std::string &str); static void unescape_yaml(std::string *str);
/* We can merge two items if they are the same command. We use the more recent timestamp, more recent identifier, and the longer list of required paths. */ /* We can merge two items if they are the same command. We use the more recent timestamp, more recent identifier, and the longer list of required paths. */
bool history_item_t::merge(const history_item_t &item) bool history_item_t::merge(const history_item_t &item)
@ -271,7 +271,7 @@ bool history_item_t::matches_search(const wcstring &term, enum history_search_ty
static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp, const path_list_t &required_paths, history_output_buffer_t *buffer) static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp, const path_list_t &required_paths, history_output_buffer_t *buffer)
{ {
std::string cmd = wcs2string(wcmd); std::string cmd = wcs2string(wcmd);
escape_yaml(cmd); escape_yaml(&cmd);
buffer->append("- cmd: ", cmd.c_str(), "\n"); buffer->append("- cmd: ", cmd.c_str(), "\n");
char timestamp_str[96]; char timestamp_str[96];
@ -285,7 +285,7 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp, const
for (path_list_t::const_iterator iter = required_paths.begin(); iter != required_paths.end(); ++iter) for (path_list_t::const_iterator iter = required_paths.begin(); iter != required_paths.end(); ++iter)
{ {
std::string path = wcs2string(*iter); std::string path = wcs2string(*iter);
escape_yaml(path); escape_yaml(&path);
buffer->append(" - ", path.c_str(), "\n"); buffer->append(" - ", path.c_str(), "\n");
} }
} }
@ -366,7 +366,7 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length
size_t result = (size_t)(-1); size_t result = (size_t)(-1);
while (cursor < mmap_length) while (cursor < mmap_length)
{ {
const char * const line_start = begin + cursor; const char *line_start = begin + cursor;
/* Advance the cursor to the next line */ /* Advance the cursor to the next line */
const char *newline = (const char *)memchr(line_start, '\n', mmap_length - cursor); const char *newline = (const char *)memchr(line_start, '\n', mmap_length - cursor);
@ -374,15 +374,14 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length
break; break;
/* Advance the cursor past this line. +1 is for the newline */ /* Advance the cursor past this line. +1 is for the newline */
size_t line_len = newline - line_start; cursor = newline - begin + 1;
cursor += line_len + 1;
/* Skip lines with a leading space, since these are in the interior of one of our items */ /* Skip lines with a leading space, since these are in the interior of one of our items */
if (line_start[0] == ' ') if (line_start[0] == ' ')
continue; continue;
/* Skip very short lines to make one of the checks below easier */ /* Skip very short lines to make one of the checks below easier */
if (line_len < 3) if (newline - line_start < 3)
continue; continue;
/* Try to be a little YAML compatible. Skip lines with leading %, ---, or ... */ /* Try to be a little YAML compatible. Skip lines with leading %, ---, or ... */
@ -390,6 +389,23 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length
! memcmp(line_start, "---", 3) || ! memcmp(line_start, "---", 3) ||
! memcmp(line_start, "...", 3)) ! memcmp(line_start, "...", 3))
continue; continue;
/* Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:". */
const char *double_cmd = "- cmd: - cmd: ";
const size_t double_cmd_len = strlen(double_cmd);
while (newline - line_start > double_cmd_len && ! memcmp(line_start, double_cmd, double_cmd_len))
{
/* Skip over just one of the - cmd. In the end there will be just one left. */
line_start += strlen("- cmd: ");
}
/* Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when: 123456". Ignore those. */
const char *cmd_when = "- cmd: when:";
const size_t cmd_when_len = strlen(cmd_when);
if (newline - line_start >= cmd_when_len && ! memcmp(line_start, cmd_when, cmd_when_len))
continue;
/* At this point, we know line_start is at the beginning of an item. But maybe we want to skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then try parsing out a timestamp. */ /* At this point, we know line_start is at the beginning of an item. But maybe we want to skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then try parsing out a timestamp. */
if (cutoff_timestamp != 0) if (cutoff_timestamp != 0)
@ -400,15 +416,9 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length
/* Walk over lines that we think are interior. These lines are not null terminated, but are guaranteed to contain a newline. */ /* Walk over lines that we think are interior. These lines are not null terminated, but are guaranteed to contain a newline. */
bool has_timestamp = false; bool has_timestamp = false;
time_t timestamp; time_t timestamp = 0;
const char *interior_line; const char *interior_line;
/*
* Ensure the loop is processed at least once. Otherwise,
* timestamp is unitialized.
*/
bool processed_once = false;
for (interior_line = next_line(line_start, end - line_start); for (interior_line = next_line(line_start, end - line_start);
interior_line != NULL && ! has_timestamp; interior_line != NULL && ! has_timestamp;
interior_line = next_line(interior_line, end - interior_line)) interior_line = next_line(interior_line, end - interior_line))
@ -423,12 +433,8 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length
/* Try parsing a timestamp from this line. If we succeed, the loop will break. */ /* Try parsing a timestamp from this line. If we succeed, the loop will break. */
has_timestamp = parse_timestamp(interior_line, &timestamp); has_timestamp = parse_timestamp(interior_line, &timestamp);
processed_once = true;
} }
assert(processed_once);
/* Skip this item if the timestamp is past our cutoff. */ /* Skip this item if the timestamp is past our cutoff. */
if (has_timestamp && timestamp > cutoff_timestamp) if (has_timestamp && timestamp > cutoff_timestamp)
{ {
@ -661,7 +667,7 @@ void history_t::set_valid_file_paths(const wcstring_list_t &valid_file_paths, hi
} }
} }
void history_t::get_string_representation(wcstring &result, const wcstring &separator) void history_t::get_string_representation(wcstring *result, const wcstring &separator)
{ {
scoped_lock locker(lock); scoped_lock locker(lock);
@ -677,8 +683,8 @@ void history_t::get_string_representation(wcstring &result, const wcstring &sepa
continue; continue;
if (! first) if (! first)
result.append(separator); result->append(separator);
result.append(iter->str()); result->append(iter->str());
first = false; first = false;
} }
@ -694,8 +700,8 @@ void history_t::get_string_representation(wcstring &result, const wcstring &sepa
continue; continue;
if (! first) if (! first)
result.append(separator); result->append(separator);
result.append(item.str()); result->append(item.str());
first = false; first = false;
} }
} }
@ -763,18 +769,18 @@ static size_t trim_leading_spaces(std::string &str)
return i; return i;
} }
static bool extract_prefix_and_unescape_yaml(std::string &key, std::string &value, const std::string &line) static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value, const std::string &line)
{ {
size_t where = line.find(":"); size_t where = line.find(":");
if (where != std::string::npos) if (where != std::string::npos)
{ {
key.assign(line, 0, where); key->assign(line, 0, where);
// skip a space after the : if necessary // skip a space after the : if necessary
size_t val_start = where + 1; size_t val_start = where + 1;
if (val_start < line.size() && line.at(val_start) == ' ') if (val_start < line.size() && line.at(val_start) == ' ')
val_start++; val_start++;
value.assign(line, val_start, line.size() - val_start); value->assign(line, val_start, line.size() - val_start);
unescape_yaml(key); unescape_yaml(key);
unescape_yaml(value); unescape_yaml(value);
@ -791,13 +797,15 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len)
size_t indent = 0, cursor = 0; size_t indent = 0, cursor = 0;
std::string key, value, line; std::string key, value, line;
/* Read the "- cmd:" line */ /* Read the "- cmd:" line */
size_t advance = read_line(base, cursor, len, line); size_t advance = read_line(base, cursor, len, line);
trim_leading_spaces(line); trim_leading_spaces(line);
if (! extract_prefix_and_unescape_yaml(key, value, line) || key != "- cmd") if (! extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd")
{
goto done; goto done;
}
cursor += advance; cursor += advance;
cmd = str2wcstring(value); cmd = str2wcstring(value);
@ -815,7 +823,7 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len)
if (this_indent == 0 || indent != this_indent) if (this_indent == 0 || indent != this_indent)
break; break;
if (! extract_prefix_and_unescape_yaml(key, value, line)) if (! extract_prefix_and_unescape_yaml(&key, &value, line))
break; break;
/* We are definitely going to consume this line */ /* We are definitely going to consume this line */
@ -845,7 +853,7 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len)
/* Skip the leading dash-space and then store this path it */ /* Skip the leading dash-space and then store this path it */
line.erase(0, 2); line.erase(0, 2);
unescape_yaml(line); unescape_yaml(&line);
paths.push_back(str2wcstring(line)); paths.push_back(str2wcstring(line));
} }
} }
@ -1190,31 +1198,31 @@ bool history_search_t::match_already_made(const wcstring &match) const
return false; return false;
} }
static void replace_all(std::string &str, const char *needle, const char *replacement) static void replace_all(std::string *str, const char *needle, const char *replacement)
{ {
size_t needle_len = strlen(needle), replacement_len = strlen(replacement); size_t needle_len = strlen(needle), replacement_len = strlen(replacement);
size_t offset = 0; size_t offset = 0;
while ((offset = str.find(needle, offset)) != std::string::npos) while ((offset = str->find(needle, offset)) != std::string::npos)
{ {
str.replace(offset, needle_len, replacement); str->replace(offset, needle_len, replacement);
offset += replacement_len; offset += replacement_len;
} }
} }
static void escape_yaml(std::string &str) static void escape_yaml(std::string *str)
{ {
replace_all(str, "\\", "\\\\"); //replace one backslash with two replace_all(str, "\\", "\\\\"); //replace one backslash with two
replace_all(str, "\n", "\\n"); //replace newline with backslash + literal n replace_all(str, "\n", "\\n"); //replace newline with backslash + literal n
} }
/* This function is called frequently, so it ought to be fast. */ /* This function is called frequently, so it ought to be fast. */
static void unescape_yaml(std::string &str) static void unescape_yaml(std::string *str)
{ {
size_t cursor = 0, size = str.size(); size_t cursor = 0, size = str->size();
while (cursor < size) while (cursor < size)
{ {
// Operate on a const version of str, to avoid needless COWs that at() does. // Operate on a const version of str, to avoid needless COWs that at() does.
const std::string &const_str = str; const std::string &const_str = *str;
// Look for a backslash // Look for a backslash
size_t backslash = const_str.find('\\', cursor); size_t backslash = const_str.find('\\', cursor);
@ -1230,13 +1238,13 @@ static void unescape_yaml(std::string &str)
if (escaped_char == '\\') if (escaped_char == '\\')
{ {
// Two backslashes in a row. Delete the second one. // Two backslashes in a row. Delete the second one.
str.erase(backslash + 1, 1); str->erase(backslash + 1, 1);
size--; size--;
} }
else if (escaped_char == 'n') else if (escaped_char == 'n')
{ {
// Backslash + n. Replace with a newline. // Backslash + n. Replace with a newline.
str.replace(backslash, 2, "\n"); str->replace(backslash, 2, "\n");
size--; size--;
} }
// The character at index backslash has now been made whole; start at the next character // The character at index backslash has now been made whole; start at the next character

View file

@ -237,7 +237,7 @@ public:
void incorporate_external_changes(); void incorporate_external_changes();
/* Gets all the history into a string with ARRAY_SEP_STR. This is intended for the $history environment variable. This may be long! */ /* Gets all the history into a string with ARRAY_SEP_STR. This is intended for the $history environment variable. This may be long! */
void get_string_representation(wcstring &str, const wcstring &separator); void get_string_representation(wcstring *result, const wcstring &separator);
/** Sets the valid file paths for the history item with the given identifier */ /** Sets the valid file paths for the history item with the given identifier */
void set_valid_file_paths(const wcstring_list_t &valid_file_paths, history_identifier_t ident); void set_valid_file_paths(const wcstring_list_t &valid_file_paths, history_identifier_t ident);

View file

@ -0,0 +1,5 @@
- cmd: this_command_is_ok
- cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: when: 1396531614
- cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: corrupt_prefix
- cmd: no_newline_at_end_of_file
when: 1403531898