diff --git a/builtin.cpp b/builtin.cpp index ebfecc920..5ab3ea430 100644 --- a/builtin.cpp +++ b/builtin.cpp @@ -3583,7 +3583,7 @@ static int builtin_history(parser_t &parser, wchar_t **argv) if (argc == 1) { wcstring full_history; - history->get_string_representation(full_history, wcstring(L"\n")); + history->get_string_representation(&full_history, wcstring(L"\n")); stdout_buffer.append(full_history); stdout_buffer.push_back('\n'); return STATUS_BUILTIN_OK; diff --git a/env.cpp b/env.cpp index 8f43d8882..88d61c857 100644 --- a/env.cpp +++ b/env.cpp @@ -953,7 +953,7 @@ env_var_t env_get_string(const wcstring &key, env_mode_flags_t mode) history = &history_t::history_with_name(L"fish"); } if (history) - history->get_string_representation(result, ARRAY_SEP_STR); + history->get_string_representation(&result, ARRAY_SEP_STR); return result; } else if (key == L"COLUMNS") diff --git a/fish_tests.cpp b/fish_tests.cpp index a415d08e7..3a6537976 100644 --- a/fish_tests.cpp +++ b/fish_tests.cpp @@ -2827,7 +2827,7 @@ static bool history_equals(history_t &hist, const wchar_t * const *strings) void history_tests_t::test_history_formats(void) { const wchar_t *name; - + // Test inferring and reading legacy and bash history formats name = L"history_sample_fish_1_x"; say(L"Testing %ls", name); @@ -2920,6 +2920,33 @@ void history_tests_t::test_history_formats(void) test_history.clear(); fclose(f); } + + name = L"history_sample_corrupt1"; + say(L"Testing %ls", name); + if (! install_sample_history(name)) + { + err(L"Couldn't open file tests/%ls", name); + } + else + { + /* We simply invoke get_string_representation. If we don't die, the test is a success. */ + history_t &test_history = history_t::history_with_name(name); + const wchar_t *expected[] = + { + L"no_newline_at_end_of_file", + + L"corrupt_prefix", + + L"this_command_is_ok", + + NULL + }; + if (! history_equals(test_history, expected)) + { + err(L"test_history_formats failed for %ls\n", name); + } + test_history.clear(); + } } void history_tests_t::test_history_speed(void) diff --git a/history.cpp b/history.cpp index 52c50a97e..ff65c4542 100644 --- a/history.cpp +++ b/history.cpp @@ -215,10 +215,10 @@ static std::map histories; static wcstring history_filename(const wcstring &name, const wcstring &suffix); /** Replaces newlines with a literal backslash followed by an n, and replaces backslashes with two backslashes. */ -static void escape_yaml(std::string &str); +static void escape_yaml(std::string *str); /** Undoes escape_yaml */ -static void unescape_yaml(std::string &str); +static void unescape_yaml(std::string *str); /* We can merge two items if they are the same command. We use the more recent timestamp, more recent identifier, and the longer list of required paths. */ bool history_item_t::merge(const history_item_t &item) @@ -271,7 +271,7 @@ bool history_item_t::matches_search(const wcstring &term, enum history_search_ty static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp, const path_list_t &required_paths, history_output_buffer_t *buffer) { std::string cmd = wcs2string(wcmd); - escape_yaml(cmd); + escape_yaml(&cmd); buffer->append("- cmd: ", cmd.c_str(), "\n"); char timestamp_str[96]; @@ -285,7 +285,7 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp, const for (path_list_t::const_iterator iter = required_paths.begin(); iter != required_paths.end(); ++iter) { std::string path = wcs2string(*iter); - escape_yaml(path); + escape_yaml(&path); buffer->append(" - ", path.c_str(), "\n"); } } @@ -366,7 +366,7 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length size_t result = (size_t)(-1); while (cursor < mmap_length) { - const char * const line_start = begin + cursor; + const char *line_start = begin + cursor; /* Advance the cursor to the next line */ const char *newline = (const char *)memchr(line_start, '\n', mmap_length - cursor); @@ -374,15 +374,14 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length break; /* Advance the cursor past this line. +1 is for the newline */ - size_t line_len = newline - line_start; - cursor += line_len + 1; + cursor = newline - begin + 1; /* Skip lines with a leading space, since these are in the interior of one of our items */ if (line_start[0] == ' ') continue; /* Skip very short lines to make one of the checks below easier */ - if (line_len < 3) + if (newline - line_start < 3) continue; /* Try to be a little YAML compatible. Skip lines with leading %, ---, or ... */ @@ -390,6 +389,23 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length ! memcmp(line_start, "---", 3) || ! memcmp(line_start, "...", 3)) continue; + + + /* Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:". */ + const char *double_cmd = "- cmd: - cmd: "; + const size_t double_cmd_len = strlen(double_cmd); + while (newline - line_start > double_cmd_len && ! memcmp(line_start, double_cmd, double_cmd_len)) + { + /* Skip over just one of the - cmd. In the end there will be just one left. */ + line_start += strlen("- cmd: "); + } + + /* Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when: 123456". Ignore those. */ + const char *cmd_when = "- cmd: when:"; + const size_t cmd_when_len = strlen(cmd_when); + if (newline - line_start >= cmd_when_len && ! memcmp(line_start, cmd_when, cmd_when_len)) + continue; + /* At this point, we know line_start is at the beginning of an item. But maybe we want to skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then try parsing out a timestamp. */ if (cutoff_timestamp != 0) @@ -400,15 +416,9 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length /* Walk over lines that we think are interior. These lines are not null terminated, but are guaranteed to contain a newline. */ bool has_timestamp = false; - time_t timestamp; + time_t timestamp = 0; const char *interior_line; - /* - * Ensure the loop is processed at least once. Otherwise, - * timestamp is unitialized. - */ - bool processed_once = false; - for (interior_line = next_line(line_start, end - line_start); interior_line != NULL && ! has_timestamp; interior_line = next_line(interior_line, end - interior_line)) @@ -423,12 +433,8 @@ static size_t offset_of_next_item_fish_2_0(const char *begin, size_t mmap_length /* Try parsing a timestamp from this line. If we succeed, the loop will break. */ has_timestamp = parse_timestamp(interior_line, ×tamp); - - processed_once = true; } - assert(processed_once); - /* Skip this item if the timestamp is past our cutoff. */ if (has_timestamp && timestamp > cutoff_timestamp) { @@ -661,7 +667,7 @@ void history_t::set_valid_file_paths(const wcstring_list_t &valid_file_paths, hi } } -void history_t::get_string_representation(wcstring &result, const wcstring &separator) +void history_t::get_string_representation(wcstring *result, const wcstring &separator) { scoped_lock locker(lock); @@ -677,8 +683,8 @@ void history_t::get_string_representation(wcstring &result, const wcstring &sepa continue; if (! first) - result.append(separator); - result.append(iter->str()); + result->append(separator); + result->append(iter->str()); first = false; } @@ -694,8 +700,8 @@ void history_t::get_string_representation(wcstring &result, const wcstring &sepa continue; if (! first) - result.append(separator); - result.append(item.str()); + result->append(separator); + result->append(item.str()); first = false; } } @@ -763,18 +769,18 @@ static size_t trim_leading_spaces(std::string &str) return i; } -static bool extract_prefix_and_unescape_yaml(std::string &key, std::string &value, const std::string &line) +static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value, const std::string &line) { size_t where = line.find(":"); if (where != std::string::npos) { - key.assign(line, 0, where); + key->assign(line, 0, where); // skip a space after the : if necessary size_t val_start = where + 1; if (val_start < line.size() && line.at(val_start) == ' ') val_start++; - value.assign(line, val_start, line.size() - val_start); + value->assign(line, val_start, line.size() - val_start); unescape_yaml(key); unescape_yaml(value); @@ -791,13 +797,15 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len) size_t indent = 0, cursor = 0; std::string key, value, line; - + /* Read the "- cmd:" line */ size_t advance = read_line(base, cursor, len, line); trim_leading_spaces(line); - if (! extract_prefix_and_unescape_yaml(key, value, line) || key != "- cmd") + if (! extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd") + { goto done; - + } + cursor += advance; cmd = str2wcstring(value); @@ -815,7 +823,7 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len) if (this_indent == 0 || indent != this_indent) break; - if (! extract_prefix_and_unescape_yaml(key, value, line)) + if (! extract_prefix_and_unescape_yaml(&key, &value, line)) break; /* We are definitely going to consume this line */ @@ -845,7 +853,7 @@ history_item_t history_t::decode_item_fish_2_0(const char *base, size_t len) /* Skip the leading dash-space and then store this path it */ line.erase(0, 2); - unescape_yaml(line); + unescape_yaml(&line); paths.push_back(str2wcstring(line)); } } @@ -1190,31 +1198,31 @@ bool history_search_t::match_already_made(const wcstring &match) const return false; } -static void replace_all(std::string &str, const char *needle, const char *replacement) +static void replace_all(std::string *str, const char *needle, const char *replacement) { size_t needle_len = strlen(needle), replacement_len = strlen(replacement); size_t offset = 0; - while ((offset = str.find(needle, offset)) != std::string::npos) + while ((offset = str->find(needle, offset)) != std::string::npos) { - str.replace(offset, needle_len, replacement); + str->replace(offset, needle_len, replacement); offset += replacement_len; } } -static void escape_yaml(std::string &str) +static void escape_yaml(std::string *str) { replace_all(str, "\\", "\\\\"); //replace one backslash with two replace_all(str, "\n", "\\n"); //replace newline with backslash + literal n } /* This function is called frequently, so it ought to be fast. */ -static void unescape_yaml(std::string &str) +static void unescape_yaml(std::string *str) { - size_t cursor = 0, size = str.size(); + size_t cursor = 0, size = str->size(); while (cursor < size) { // Operate on a const version of str, to avoid needless COWs that at() does. - const std::string &const_str = str; + const std::string &const_str = *str; // Look for a backslash size_t backslash = const_str.find('\\', cursor); @@ -1230,13 +1238,13 @@ static void unescape_yaml(std::string &str) if (escaped_char == '\\') { // Two backslashes in a row. Delete the second one. - str.erase(backslash + 1, 1); + str->erase(backslash + 1, 1); size--; } else if (escaped_char == 'n') { // Backslash + n. Replace with a newline. - str.replace(backslash, 2, "\n"); + str->replace(backslash, 2, "\n"); size--; } // The character at index backslash has now been made whole; start at the next character diff --git a/history.h b/history.h index 5b7292e83..921f09c54 100644 --- a/history.h +++ b/history.h @@ -237,7 +237,7 @@ public: void incorporate_external_changes(); /* Gets all the history into a string with ARRAY_SEP_STR. This is intended for the $history environment variable. This may be long! */ - void get_string_representation(wcstring &str, const wcstring &separator); + void get_string_representation(wcstring *result, const wcstring &separator); /** Sets the valid file paths for the history item with the given identifier */ void set_valid_file_paths(const wcstring_list_t &valid_file_paths, history_identifier_t ident); diff --git a/tests/history_sample_corrupt1 b/tests/history_sample_corrupt1 new file mode 100644 index 000000000..3dfe694fa --- /dev/null +++ b/tests/history_sample_corrupt1 @@ -0,0 +1,5 @@ +- cmd: this_command_is_ok +- cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: when: 1396531614 +- cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: - cmd: corrupt_prefix +- cmd: no_newline_at_end_of_file + when: 1403531898 \ No newline at end of file