Migrate history file format stuff into new file history_file.cpp

Breaks up the history.cpp monolith.
2024-12-28 05:43:11 +00:00 · 2019-08-11 12:34:29 -07:00 · 2019-08-11 12:34:29 -07:00 · 91987a4548
commit 91987a4548
parent 74357bac91
4 changed files with 629 additions and 596 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -94,8 +94,8 @@ SET(FISH_SRCS
    src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp
    src/env_universal_common.cpp src/event.cpp src/exec.cpp src/expand.cpp
    src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
-    src/history.cpp src/input.cpp src/input_common.cpp src/intern.cpp src/io.cpp
-    src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
+    src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
+    src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
    src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
    src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
    src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
--- a/src/history.cpp
+++ b/src/history.cpp
@ -12,7 +12,6 @@
 #include <cstring>
 // We need the sys/file.h for the flock() declaration on Linux but not OS X.
 #include <sys/file.h>  // IWYU pragma: keep
-#include <sys/mman.h>
 #include <sys/stat.h>
 #include <time.h>
 #include <unistd.h>
@ -34,6 +33,7 @@
 #include "flog.h"
 #include "global_safety.h"
 #include "history.h"
+#include "history_file.h"
 #include "io.h"
 #include "iothread.h"
 #include "lru.h"
@ -159,22 +159,6 @@ static bool history_file_lock(int fd, int lock_type) {
    return retval != -1;
 }

-// History file types.
-enum history_file_type_t { history_type_fish_2_0, history_type_fish_1_x };
-
-/// Try to infer the history file type based on inspecting the data.
-static maybe_t<history_file_type_t> infer_file_type(const void *data, size_t len) {
-    maybe_t<history_file_type_t> result{};
-    if (len > 0) {  // old fish started with a #
-        if (static_cast<const char *>(data)[0] == '#') {
-            result = history_type_fish_1_x;
-        } else {  // assume new fish
-            result = history_type_fish_2_0;
-        }
-    }
-    return result;
-}
-
 /// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
 /// order it.
 class history_lru_item_t {
@ -214,325 +198,8 @@ class history_lru_cache_t : public lru_cache_t<history_lru_cache_t, history_lru_

 }  // anonymous namespace

-// history_file_contents_t holds the read-only contents of a file.
-class history_file_contents_t {
-    // The memory mapped pointer.
-    void *start_;
-
-    // The mapped length.
-    size_t length_;
-
-    // The type of the mapped file.
-    history_file_type_t type_;
-
-    // Private constructor; use the static create() function.
-    history_file_contents_t(void *mmap_start, size_t mmap_length, history_file_type_t type)
-        : start_(mmap_start), length_(mmap_length), type_(type) {
-        assert(mmap_start != MAP_FAILED && "Invalid mmap address");
-    }
-
-    history_file_contents_t(history_file_contents_t &&) = delete;
-    void operator=(history_file_contents_t &&) = delete;
-
-    // Check if we should mmap the fd.
-    // Don't try mmap() on non-local filesystems.
-    static bool should_mmap(int fd) {
-        if (history_t::never_mmap) return false;
-
-        // mmap only if we are known not-remote (return is 0).
-        int ret = fd_check_is_remote(fd);
-        return ret == 0;
-    }
-
-    // Read up to len bytes from fd into address, zeroing the rest.
-    // Return true on success, false on failure.
-    static bool read_from_fd(int fd, void *address, size_t len) {
-        size_t remaining = len;
-        char *ptr = static_cast<char *>(address);
-        while (remaining > 0) {
-            ssize_t amt = read(fd, ptr, remaining);
-            if (amt < 0) {
-                if (errno != EINTR) {
-                    return false;
-                }
-            } else if (amt == 0) {
-                break;
-            } else {
-                remaining -= amt;
-                ptr += amt;
-            }
-        }
-        std::memset(ptr, 0, remaining);
-        return true;
-    }
-
-   public:
-    // Access the address at a given offset.
-    const char *address_at(size_t offset) const {
-        assert(offset <= length_ && "Invalid offset");
-        auto base = static_cast<const char *>(start_);
-        return base + offset;
-    }
-
-    // Return a pointer to the beginning.
-    const char *begin() const { return address_at(0); }
-
-    // Return a pointer to one-past-the-end.
-    const char *end() const { return address_at(length_); }
-
-    // Get the size of the contents.
-    size_t length() const { return length_; }
-
-    // Get the file type.
-    history_file_type_t type() const { return type_; }
-
-    ~history_file_contents_t() { munmap(start_, length_); }
-
-    // Construct a history file contents from a file descriptor. The file descriptor is not closed.
-    static std::unique_ptr<history_file_contents_t> create(int fd) {
-        // Check that the file is seekable, and its size.
-        off_t len = lseek(fd, 0, SEEK_END);
-        if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr;
-        if (lseek(fd, 0, SEEK_SET) != 0) return nullptr;
-
-        // Read the file, possibly ussing mmap.
-        void *mmap_start = nullptr;
-        if (should_mmap(fd)) {
-            // We feel confident to map the file directly. Note this is still risky: if another
-            // process truncates the file we risk SIGBUS.
-            mmap_start = mmap(0, size_t(len), PROT_READ, MAP_PRIVATE, fd, 0);
-            if (mmap_start == MAP_FAILED) return nullptr;
-        } else {
-            // We don't want to map the file. mmap some private memory and then read into it. We use
-            // mmap instead of malloc so that the destructor can always munmap().
-            mmap_start =
-#ifdef MAP_ANON
-                mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
-#else
-                mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-#endif
-            if (mmap_start == MAP_FAILED) return nullptr;
-            if (!read_from_fd(fd, mmap_start, len)) return nullptr;
-        }
-
-        // Check the file type.
-        auto mtype = infer_file_type(mmap_start, len);
-        if (!mtype) return nullptr;
-
-        return std::unique_ptr<history_file_contents_t>(
-            new history_file_contents_t(mmap_start, len, *mtype));
-    }
-};
-
 static wcstring history_filename(const wcstring &name, const wcstring &suffix);

-/// Replaces newlines with a literal backslash followed by an n, and replaces backslashes with two
-/// backslashes.
-static void escape_yaml(std::string *str);
-
-/// Inverse of escape_yaml.
-static void unescape_yaml(std::string *str);
-
-/// Read one line, stripping off any newline, and updating cursor. Note that our input string is NOT
-/// null terminated; it's just a memory mapped file.
-static size_t read_line(const char *base, size_t cursor, size_t len, std::string &result) {
-    // Locate the newline.
-    assert(cursor <= len);
-    const char *start = base + cursor;
-    const char *a_newline = (char *)std::memchr(start, '\n', len - cursor);
-    if (a_newline != NULL) {  // we found a newline
-        result.assign(start, a_newline - start);
-        // Return the amount to advance the cursor; skip over the newline.
-        return a_newline - start + 1;
-    }
-
-    // We ran off the end.
-    result.clear();
-    return len - cursor;
-}
-
-/// Trims leading spaces in the given string, returning how many there were.
-static size_t trim_leading_spaces(std::string &str) {
-    size_t i = 0, max = str.size();
-    while (i < max && str[i] == ' ') i++;
-    str.erase(0, i);
-    return i;
-}
-
-static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value,
-                                             const std::string &line) {
-    size_t where = line.find(':');
-    if (where != std::string::npos) {
-        key->assign(line, 0, where);
-
-        // Skip a space after the : if necessary.
-        size_t val_start = where + 1;
-        if (val_start < line.size() && line.at(val_start) == ' ') val_start++;
-        value->assign(line, val_start, line.size() - val_start);
-
-        unescape_yaml(key);
-        unescape_yaml(value);
-    }
-    return where != std::string::npos;
-}
-
-/// Remove backslashes from all newlines. This makes a string from the history file better formated
-/// for on screen display.
-static wcstring history_unescape_newlines_fish_1_x(const wcstring &in_str) {
-    wcstring out;
-    for (const wchar_t *in = in_str.c_str(); *in; in++) {
-        if (*in == L'\\') {
-            if (*(in + 1) != L'\n') {
-                out.push_back(*in);
-            }
-        } else {
-            out.push_back(*in);
-        }
-    }
-    return out;
-}
-
-/// Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get().
-static history_item_t decode_item_fish_1_x(const char *begin, size_t length) {
-    const char *end = begin + length;
-    const char *pos = begin;
-    wcstring out;
-    bool was_backslash = false;
-    bool first_char = true;
-    bool timestamp_mode = false;
-    time_t timestamp = 0;
-
-    while (1) {
-        wchar_t c;
-        size_t res;
-        mbstate_t state = {};
-
-        if (MB_CUR_MAX == 1) {  // single-byte locale
-            c = (unsigned char)*pos;
-            res = 1;
-        } else {
-            res = std::mbrtowc(&c, pos, end - pos, &state);
-        }
-
-        if (res == (size_t)-1) {
-            pos++;
-            continue;
-        } else if (res == (size_t)-2) {
-            break;
-        } else if (res == (size_t)0) {
-            pos++;
-            continue;
-        }
-        pos += res;
-
-        if (c == L'\n') {
-            if (timestamp_mode) {
-                const wchar_t *time_string = out.c_str();
-                while (*time_string && !iswdigit(*time_string)) time_string++;
-
-                if (*time_string) {
-                    time_t tm = (time_t)fish_wcstol(time_string);
-                    if (!errno && tm >= 0) {
-                        timestamp = tm;
-                    }
-                }
-
-                out.clear();
-                timestamp_mode = false;
-                continue;
-            }
-            if (!was_backslash) break;
-        }
-
-        if (first_char) {
-            first_char = false;
-            if (c == L'#') timestamp_mode = true;
-        }
-
-        out.push_back(c);
-        was_backslash = (c == L'\\') && !was_backslash;
-    }
-
-    out = history_unescape_newlines_fish_1_x(out);
-    return history_item_t(out, timestamp);
-}
-
-/// Decode an item via the fish 2.0 format.
-static history_item_t decode_item_fish_2_0(const char *base, size_t len) {
-    wcstring cmd;
-    time_t when = 0;
-    path_list_t paths;
-
-    size_t indent = 0, cursor = 0;
-    std::string key, value, line;
-
-    // Read the "- cmd:" line.
-    size_t advance = read_line(base, cursor, len, line);
-    trim_leading_spaces(line);
-    if (!extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd") {
-        goto done;  //!OCLINT(goto is the cleanest way to handle bad input)
-    }
-
-    cursor += advance;
-    cmd = str2wcstring(value);
-
-    // Read the remaining lines.
-    for (;;) {
-        size_t advance = read_line(base, cursor, len, line);
-
-        size_t this_indent = trim_leading_spaces(line);
-        if (indent == 0) indent = this_indent;
-
-        if (this_indent == 0 || indent != this_indent) break;
-
-        if (!extract_prefix_and_unescape_yaml(&key, &value, line)) break;
-
-        // We are definitely going to consume this line.
-        cursor += advance;
-
-        if (key == "when") {
-            // Parse an int from the timestamp. Should this fail, strtol returns 0; that's
-            // acceptable.
-            char *end = NULL;
-            long tmp = strtol(value.c_str(), &end, 0);
-            when = tmp;
-        } else if (key == "paths") {
-            // Read lines starting with " - " until we can't read any more.
-            for (;;) {
-                size_t advance = read_line(base, cursor, len, line);
-                if (trim_leading_spaces(line) <= indent) break;
-
-                if (std::strncmp(line.c_str(), "- ", 2)) break;
-
-                // We're going to consume this line.
-                cursor += advance;
-
-                // Skip the leading dash-space and then store this path it.
-                line.erase(0, 2);
-                unescape_yaml(&line);
-                paths.push_back(str2wcstring(line));
-            }
-        }
-    }
-
-done:
-    history_item_t result(cmd, when);
-    result.set_required_paths(paths);
-    return result;
-}
-
-static history_item_t decode_item(const history_file_contents_t &contents, size_t offset) {
-    const char *base = contents.address_at(offset);
-    size_t len = contents.length() - offset;
-    switch (contents.type()) {
-        case history_type_fish_2_0:
-            return decode_item_fish_2_0(base, len);
-        case history_type_fish_1_x:
-            return decode_item_fish_1_x(base, len);
-    }
-    return history_item_t(L"");
-}
-
 /// We can merge two items if they are the same command. We use the more recent timestamp, more
 /// recent identifier, and the longer list of required paths.
 bool history_item_t::merge(const history_item_t &item) {
@ -592,7 +259,7 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp,
                                  const path_list_t &required_paths,
                                  history_output_buffer_t *buffer) {
    std::string cmd = wcs2string(wcmd);
-    escape_yaml(&cmd);
+    escape_yaml_fish_2_0(&cmd);
    buffer->append("- cmd: ", cmd.c_str(), "\n");
    buffer->append("  when: ", std::to_string(timestamp).c_str(), "\n");

@ -601,206 +268,12 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp,

        for (auto const &wpath : required_paths) {
            std::string path = wcs2string(wpath);
-            escape_yaml(&path);
+            escape_yaml_fish_2_0(&path);
            buffer->append("    - ", path.c_str(), "\n");
        }
    }
 }

-/// Parse a timestamp line that looks like this: spaces, "when:", spaces, timestamp, newline
-/// The string is NOT null terminated; however we do know it contains a newline, so stop when we
-/// reach it.
-static bool parse_timestamp(const char *str, time_t *out_when) {
-    const char *cursor = str;
-    // Advance past spaces.
-    while (*cursor == ' ') cursor++;
-
-    // Look for "when:".
-    size_t when_len = 5;
-    if (std::strncmp(cursor, "when:", when_len) != 0) return false;
-    cursor += when_len;
-
-    // Advance past spaces.
-    while (*cursor == ' ') cursor++;
-
-    // Try to parse a timestamp.
-    long timestamp = 0;
-    if (isdigit(*cursor) && (timestamp = strtol(cursor, NULL, 0)) > 0) {
-        *out_when = (time_t)timestamp;
-        return true;
-    }
-    return false;
-}
-
-/// Returns a pointer to the start of the next line, or NULL. The next line must itself end with a
-/// newline. Note that the string is not null terminated.
-static const char *next_line(const char *start, size_t length) {
-    // Handle the hopeless case.
-    if (length < 1) return NULL;
-
-    // Get a pointer to the end, that we must not pass.
-    const char *const end = start + length;
-
-    // Skip past the next newline.
-    const char *nextline = (const char *)std::memchr(start, '\n', length);
-    if (!nextline || nextline >= end) {
-        return NULL;
-    }
-    // Skip past the newline character itself.
-    if (++nextline >= end) {
-        return NULL;
-    }
-
-    // Make sure this new line is itself "newline terminated". If it's not, return NULL.
-    const char *next_newline = (const char *)std::memchr(nextline, '\n', end - nextline);
-    if (!next_newline) {
-        return NULL;
-    }
-
-    return nextline;
-}
-
-/// Support for iteratively locating the offsets of history items.
-/// Pass the file contents and a pointer to a cursor size_t, initially 0.
-/// If custoff_timestamp is nonzero, skip items created at or after that timestamp.
-/// Returns (size_t)-1 when done.
-static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
-                                           size_t *inout_cursor, time_t cutoff_timestamp) {
-    size_t cursor = *inout_cursor;
-    size_t result = size_t(-1);
-    const size_t length = contents.length();
-    const char *const begin = contents.begin();
-    const char *const end = contents.end();
-    while (cursor < length) {
-        const char *line_start = contents.address_at(cursor);
-
-        // Advance the cursor to the next line.
-        const char *a_newline = (const char *)std::memchr(line_start, '\n', length - cursor);
-        if (a_newline == NULL) break;
-
-        // Advance the cursor past this line. +1 is for the newline.
-        cursor = a_newline - begin + 1;
-
-        // Skip lines with a leading space, since these are in the interior of one of our items.
-        if (line_start[0] == ' ') continue;
-
-        // Skip very short lines to make one of the checks below easier.
-        if (a_newline - line_start < 3) continue;
-
-        // Try to be a little YAML compatible. Skip lines with leading %, ---, or ...
-        if (!std::memcmp(line_start, "%", 1) || !std::memcmp(line_start, "---", 3) ||
-            !std::memcmp(line_start, "...", 3))
-            continue;
-
-        // Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of
-        // leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:".
-        const char *double_cmd = "- cmd: - cmd: ";
-        const size_t double_cmd_len = std::strlen(double_cmd);
-        while ((size_t)(a_newline - line_start) > double_cmd_len &&
-               !std::memcmp(line_start, double_cmd, double_cmd_len)) {
-            // Skip over just one of the - cmd. In the end there will be just one left.
-            line_start += std::strlen("- cmd: ");
-        }
-
-        // Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when:
-        // 123456". Ignore those.
-        const char *cmd_when = "- cmd:    when:";
-        const size_t cmd_when_len = std::strlen(cmd_when);
-        if ((size_t)(a_newline - line_start) >= cmd_when_len &&
-            !std::memcmp(line_start, cmd_when, cmd_when_len)) {
-            continue;
-        }
-
-        // At this point, we know line_start is at the beginning of an item. But maybe we want to
-        // skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then
-        // try parsing out a timestamp.
-        if (cutoff_timestamp != 0) {
-            // Hackish fast way to skip items created after our timestamp. This is the mechanism by
-            // which we avoid "seeing" commands from other sessions that started after we started.
-            // We try hard to ensure that our items are sorted by their timestamps, so in theory we
-            // could just break, but I don't think that works well if (for example) the clock
-            // changes. So we'll read all subsequent items.
-            // Walk over lines that we think are interior. These lines are not null terminated, but
-            // are guaranteed to contain a newline.
-            bool has_timestamp = false;
-            time_t timestamp = 0;
-            const char *interior_line;
-
-            for (interior_line = next_line(line_start, end - line_start);
-                 interior_line != NULL && !has_timestamp;
-                 interior_line = next_line(interior_line, end - interior_line)) {
-                // If the first character is not a space, it's not an interior line, so we're done.
-                if (interior_line[0] != ' ') break;
-
-                // Hackish optimization: since we just stepped over some interior line, update the
-                // cursor so we don't have to look at these lines next time.
-                cursor = interior_line - begin;
-
-                // Try parsing a timestamp from this line. If we succeed, the loop will break.
-                has_timestamp = parse_timestamp(interior_line, &timestamp);
-            }
-
-            // Skip this item if the timestamp is past our cutoff.
-            if (has_timestamp && timestamp > cutoff_timestamp) {
-                continue;
-            }
-        }
-
-        // We made it through the gauntlet.
-        result = line_start - begin;
-        break;  //!OCLINT(avoid branching statement as last in loop)
-    }
-
-    *inout_cursor = cursor;
-    return result;
-}
-
-/// Same as offset_of_next_item_fish_2_0, but for fish 1.x (pre fishfish).
-/// Adapted from history_populate_from_mmap in history.c
-static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
-                                           size_t *inout_cursor) {
-    if (mmap_length == 0 || *inout_cursor >= mmap_length) return (size_t)-1;
-
-    const char *end = begin + mmap_length;
-    const char *pos;
-    bool ignore_newline = false;
-    bool do_push = true;
-    bool all_done = false;
-    size_t result = *inout_cursor;
-
-    for (pos = begin + *inout_cursor; pos < end && !all_done; pos++) {
-        if (do_push) {
-            ignore_newline = (*pos == '#');
-            do_push = false;
-        }
-
-        if (*pos == '\\') {
-            pos++;
-        } else if (*pos == '\n') {
-            if (!ignore_newline) {
-                // pos will be left pointing just after this newline, because of the ++ in the loop.
-                all_done = true;
-            }
-            ignore_newline = false;
-        }
-    }
-
-    *inout_cursor = (pos - begin);
-    return result;
-}
-
-/// Returns the offset of the next item based on the given history type, or -1.
-static size_t offset_of_next_item(const history_file_contents_t &contents, size_t *inout_cursor,
-                                  time_t cutoff_timestamp) {
-    switch (contents.type()) {
-        case history_type_fish_2_0:
-            return offset_of_next_item_fish_2_0(contents, inout_cursor, cutoff_timestamp);
-        case history_type_fish_1_x:
-            return offset_of_next_item_fish_1_x(contents.begin(), contents.length(), inout_cursor);
-    }
-    return size_t(-1);
-}
-
 struct history_impl_t {
    // Privately add an item. If pending, the item will not be returned by history searches until a
    // call to resolve_pending.
@ -857,9 +330,6 @@ struct history_impl_t {
    // Loads old items if necessary.
    void load_old_if_needed();

-    // Reads the history file if necessary.
-    bool mmap_if_needed();
-
    // Deletes duplicates in new_items.
    void compact_new_items();

@ -1062,7 +532,7 @@ void history_impl_t::get_history(wcstring_list_t &result) {
    load_old_if_needed();
    for (auto iter = old_item_offsets.crbegin(); iter != old_item_offsets.crend(); ++iter) {
        size_t offset = *iter;
-        const history_item_t item = decode_item(*file_contents, offset);
+        const history_item_t item = file_contents->decode_item(offset);
        if (seen.insert(item.str()).second) result.push_back(item.str());
    }
 }
@ -1099,7 +569,7 @@ history_item_t history_impl_t::item_at_index(size_t idx) {
    if (idx < old_item_count) {
        // idx == 0 corresponds to last item in old_item_offsets.
        size_t offset = old_item_offsets.at(old_item_count - idx - 1);
-        return decode_item(*file_contents, offset);
+        return file_contents->decode_item(offset);
    }

    // Index past the valid range, so return an empty history item.
@ -1129,13 +599,9 @@ void history_impl_t::populate_from_file_contents() {
    old_item_offsets.clear();
    if (file_contents) {
        size_t cursor = 0;
-        for (;;) {
-            size_t offset = offset_of_next_item(*file_contents, &cursor, boundary_timestamp);
-            // If we get back -1, we're done.
-            if (offset == size_t(-1)) break;
-
+        while (auto offset = file_contents->offset_of_next_item(&cursor, boundary_timestamp)) {
            // Remember this item.
-            old_item_offsets.push_back(offset);
+            old_item_offsets.push_back(*offset);
        }
    }
 }
@ -1219,52 +685,6 @@ wcstring history_search_t::current_string() const {
    return item.str();
 }

-static void replace_all(std::string *str, const char *needle, const char *replacement) {
-    size_t needle_len = std::strlen(needle), replacement_len = std::strlen(replacement);
-    size_t offset = 0;
-    while ((offset = str->find(needle, offset)) != std::string::npos) {
-        str->replace(offset, needle_len, replacement);
-        offset += replacement_len;
-    }
-}
-
-static void escape_yaml(std::string *str) {
-    replace_all(str, "\\", "\\\\");  // replace one backslash with two
-    replace_all(str, "\n", "\\n");   // replace newline with backslash + literal n
-}
-
-/// This function is called frequently, so it ought to be fast.
-static void unescape_yaml(std::string *str) {
-    size_t cursor = 0, size = str->size();
-    while (cursor < size) {
-        // Operate on a const version of str, to avoid needless COWs that at() does.
-        const std::string &const_str = *str;
-
-        // Look for a backslash.
-        size_t backslash = const_str.find('\\', cursor);
-        if (backslash == std::string::npos || backslash + 1 >= size) {
-            // Either not found, or found as the last character.
-            break;
-        } else {
-            // Backslash found. Maybe we'll do something about it. Be sure to invoke the const
-            // version of at().
-            char escaped_char = const_str.at(backslash + 1);
-            if (escaped_char == '\\') {
-                // Two backslashes in a row. Delete the second one.
-                str->erase(backslash + 1, 1);
-                size--;
-            } else if (escaped_char == 'n') {
-                // Backslash + n. Replace with a newline.
-                str->replace(backslash, 2, "\n");
-                size--;
-            }
-            // The character at index backslash has now been made whole; start at the next
-            // character.
-            cursor = backslash + 1;
-        }
-    }
-}
-
 static wcstring history_filename(const wcstring &session_id, const wcstring &suffix) {
    if (session_id.empty()) return L"";

@ -1321,16 +741,11 @@ bool history_impl_t::rewrite_to_temporary_file(int existing_fd, int dst_fd) cons
    // old file contents).
    if (auto local_file = history_file_contents_t::create(existing_fd)) {
        size_t cursor = 0;
-        for (;;) {
-            size_t offset = offset_of_next_item(*local_file, &cursor, 0);
-            // If we get back -1, we're done.
-            if (offset == (size_t)-1) break;
-
+        while (auto offset = local_file->offset_of_next_item(&cursor, 0)) {
            // Try decoding an old item.
-            const history_item_t old_item = decode_item(*local_file, offset);
+            const history_item_t old_item = local_file->decode_item(*offset);

            if (old_item.empty() || deleted_items.count(old_item.str()) > 0) {
-                // FLOGF(error, L"Item is deleted : %s\n", old_item.str().c_str());
                continue;
            }
            // Add this old item.
--- a/src/history_file.cpp
+++ b/src/history_file.cpp
@ -0,0 +1,546 @@
+#include "config.h"
+
+#include "history.h"
+#include "history_file.h"
+
+#include <cstring>
+
+// Some forward declarations.
+static history_item_t decode_item_fish_2_0(const char *base, size_t len);
+static history_item_t decode_item_fish_1_x(const char *begin, size_t length);
+
+static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
+                                           size_t *inout_cursor, time_t cutoff_timestamp);
+static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
+                                           size_t *inout_cursor);
+
+// Check if we should mmap the fd.
+// Don't try mmap() on non-local filesystems.
+static bool should_mmap(int fd) {
+    if (history_t::never_mmap) return false;
+
+    // mmap only if we are known not-remote (return is 0).
+    int ret = fd_check_is_remote(fd);
+    return ret == 0;
+}
+
+// Read up to len bytes from fd into address, zeroing the rest.
+// Return true on success, false on failure.
+static bool read_from_fd(int fd, void *address, size_t len) {
+    size_t remaining = len;
+    char *ptr = static_cast<char *>(address);
+    while (remaining > 0) {
+        ssize_t amt = read(fd, ptr, remaining);
+        if (amt < 0) {
+            if (errno != EINTR) {
+                return false;
+            }
+        } else if (amt == 0) {
+            break;
+        } else {
+            remaining -= amt;
+            ptr += amt;
+        }
+    }
+    std::memset(ptr, 0, remaining);
+    return true;
+}
+
+/// Try to infer the history file type based on inspecting the data.
+static maybe_t<history_file_type_t> infer_file_type(const void *data, size_t len) {
+    maybe_t<history_file_type_t> result{};
+    if (len > 0) {  // old fish started with a #
+        if (static_cast<const char *>(data)[0] == '#') {
+            result = history_type_fish_1_x;
+        } else {  // assume new fish
+            result = history_type_fish_2_0;
+        }
+    }
+    return result;
+}
+
+static void replace_all(std::string *str, const char *needle, const char *replacement) {
+    size_t needle_len = std::strlen(needle), replacement_len = std::strlen(replacement);
+    size_t offset = 0;
+    while ((offset = str->find(needle, offset)) != std::string::npos) {
+        str->replace(offset, needle_len, replacement);
+        offset += replacement_len;
+    }
+}
+
+void escape_yaml_fish_2_0(std::string *str) {
+    replace_all(str, "\\", "\\\\");  // replace one backslash with two
+    replace_all(str, "\n", "\\n");   // replace newline with backslash + literal n
+}
+
+/// This function is called frequently, so it ought to be fast.
+void unescape_yaml_fish_2_0(std::string *str) {
+    size_t cursor = 0, size = str->size();
+    while (cursor < size) {
+        // Operate on a const version of str, to avoid needless COWs that at() does.
+        const std::string &const_str = *str;
+
+        // Look for a backslash.
+        size_t backslash = const_str.find('\\', cursor);
+        if (backslash == std::string::npos || backslash + 1 >= size) {
+            // Either not found, or found as the last character.
+            break;
+        } else {
+            // Backslash found. Maybe we'll do something about it. Be sure to invoke the const
+            // version of at().
+            char escaped_char = const_str.at(backslash + 1);
+            if (escaped_char == '\\') {
+                // Two backslashes in a row. Delete the second one.
+                str->erase(backslash + 1, 1);
+                size--;
+            } else if (escaped_char == 'n') {
+                // Backslash + n. Replace with a newline.
+                str->replace(backslash, 2, "\n");
+                size--;
+            }
+            // The character at index backslash has now been made whole; start at the next
+            // character.
+            cursor = backslash + 1;
+        }
+    }
+}
+
+history_file_contents_t::~history_file_contents_t() { munmap(const_cast<char *>(start_), length_); }
+
+history_file_contents_t::history_file_contents_t(const char *mmap_start, size_t mmap_length,
+                                                 history_file_type_t type)
+    : start_(mmap_start), length_(mmap_length), type_(type) {
+    assert(mmap_start != MAP_FAILED && "Invalid mmap address");
+}
+
+std::unique_ptr<history_file_contents_t> history_file_contents_t::create(int fd) {
+    // Check that the file is seekable, and its size.
+    off_t len = lseek(fd, 0, SEEK_END);
+    if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr;
+    if (lseek(fd, 0, SEEK_SET) != 0) return nullptr;
+
+    // Read the file, possibly ussing mmap.
+    void *mmap_start = nullptr;
+    if (should_mmap(fd)) {
+        // We feel confident to map the file directly. Note this is still risky: if another
+        // process truncates the file we risk SIGBUS.
+        mmap_start = mmap(0, size_t(len), PROT_READ, MAP_PRIVATE, fd, 0);
+        if (mmap_start == MAP_FAILED) return nullptr;
+    } else {
+        // We don't want to map the file. mmap some private memory and then read into it. We use
+        // mmap instead of malloc so that the destructor can always munmap().
+        mmap_start =
+#ifdef MAP_ANON
+            mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+#else
+            mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+#endif
+        if (mmap_start == MAP_FAILED) return nullptr;
+        if (!read_from_fd(fd, mmap_start, len)) return nullptr;
+    }
+
+    // Check the file type.
+    auto mtype = infer_file_type(mmap_start, len);
+    if (!mtype) return nullptr;
+
+    return std::unique_ptr<history_file_contents_t>(
+        new history_file_contents_t(static_cast<const char *>(mmap_start), len, *mtype));
+}
+
+history_item_t history_file_contents_t::decode_item(size_t offset) const {
+    const char *base = address_at(offset);
+    size_t len = this->length() - offset;
+    switch (this->type()) {
+        case history_type_fish_2_0:
+            return decode_item_fish_2_0(base, len);
+        case history_type_fish_1_x:
+            return decode_item_fish_1_x(base, len);
+    }
+    return history_item_t(L"");
+}
+
+maybe_t<size_t> history_file_contents_t::offset_of_next_item(size_t *cursor, time_t cutoff) {
+    size_t offset = size_t(-1);
+    switch (this->type()) {
+        case history_type_fish_2_0:
+            offset = offset_of_next_item_fish_2_0(*this, cursor, cutoff);
+            break;
+        case history_type_fish_1_x:
+            offset = offset_of_next_item_fish_1_x(this->begin(), this->length(), cursor);
+            break;
+    }
+    if (offset == size_t(-1)) {
+        return none();
+    }
+    return offset;
+}
+
+/// Read one line, stripping off any newline, and updating cursor. Note that our input string is NOT
+/// null terminated; it's just a memory mapped file.
+static size_t read_line(const char *base, size_t cursor, size_t len, std::string &result) {
+    // Locate the newline.
+    assert(cursor <= len);
+    const char *start = base + cursor;
+    const char *a_newline = (char *)std::memchr(start, '\n', len - cursor);
+    if (a_newline != NULL) {  // we found a newline
+        result.assign(start, a_newline - start);
+        // Return the amount to advance the cursor; skip over the newline.
+        return a_newline - start + 1;
+    }
+
+    // We ran off the end.
+    result.clear();
+    return len - cursor;
+}
+
+/// Trims leading spaces in the given string, returning how many there were.
+static size_t trim_leading_spaces(std::string &str) {
+    size_t i = 0, max = str.size();
+    while (i < max && str[i] == ' ') i++;
+    str.erase(0, i);
+    return i;
+}
+
+static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value,
+                                             const std::string &line) {
+    size_t where = line.find(':');
+    if (where != std::string::npos) {
+        key->assign(line, 0, where);
+
+        // Skip a space after the : if necessary.
+        size_t val_start = where + 1;
+        if (val_start < line.size() && line.at(val_start) == ' ') val_start++;
+        value->assign(line, val_start, line.size() - val_start);
+
+        unescape_yaml_fish_2_0(key);
+        unescape_yaml_fish_2_0(value);
+    }
+    return where != std::string::npos;
+}
+
+/// Decode an item via the fish 2.0 format.
+static history_item_t decode_item_fish_2_0(const char *base, size_t len) {
+    wcstring cmd;
+    time_t when = 0;
+    path_list_t paths;
+
+    size_t indent = 0, cursor = 0;
+    std::string key, value, line;
+
+    // Read the "- cmd:" line.
+    size_t advance = read_line(base, cursor, len, line);
+    trim_leading_spaces(line);
+    if (!extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd") {
+        goto done;  //!OCLINT(goto is the cleanest way to handle bad input)
+    }
+
+    cursor += advance;
+    cmd = str2wcstring(value);
+
+    // Read the remaining lines.
+    for (;;) {
+        size_t advance = read_line(base, cursor, len, line);
+
+        size_t this_indent = trim_leading_spaces(line);
+        if (indent == 0) indent = this_indent;
+
+        if (this_indent == 0 || indent != this_indent) break;
+
+        if (!extract_prefix_and_unescape_yaml(&key, &value, line)) break;
+
+        // We are definitely going to consume this line.
+        cursor += advance;
+
+        if (key == "when") {
+            // Parse an int from the timestamp. Should this fail, strtol returns 0; that's
+            // acceptable.
+            char *end = NULL;
+            long tmp = strtol(value.c_str(), &end, 0);
+            when = tmp;
+        } else if (key == "paths") {
+            // Read lines starting with " - " until we can't read any more.
+            for (;;) {
+                size_t advance = read_line(base, cursor, len, line);
+                if (trim_leading_spaces(line) <= indent) break;
+
+                if (std::strncmp(line.c_str(), "- ", 2)) break;
+
+                // We're going to consume this line.
+                cursor += advance;
+
+                // Skip the leading dash-space and then store this path it.
+                line.erase(0, 2);
+                unescape_yaml_fish_2_0(&line);
+                paths.push_back(str2wcstring(line));
+            }
+        }
+    }
+
+done:
+    history_item_t result(cmd, when);
+    result.set_required_paths(paths);
+    return result;
+}
+
+/// Parse a timestamp line that looks like this: spaces, "when:", spaces, timestamp, newline
+/// The string is NOT null terminated; however we do know it contains a newline, so stop when we
+/// reach it.
+static bool parse_timestamp(const char *str, time_t *out_when) {
+    const char *cursor = str;
+    // Advance past spaces.
+    while (*cursor == ' ') cursor++;
+
+    // Look for "when:".
+    size_t when_len = 5;
+    if (std::strncmp(cursor, "when:", when_len) != 0) return false;
+    cursor += when_len;
+
+    // Advance past spaces.
+    while (*cursor == ' ') cursor++;
+
+    // Try to parse a timestamp.
+    long timestamp = 0;
+    if (isdigit(*cursor) && (timestamp = strtol(cursor, NULL, 0)) > 0) {
+        *out_when = (time_t)timestamp;
+        return true;
+    }
+    return false;
+}
+
+/// Returns a pointer to the start of the next line, or NULL. The next line must itself end with a
+/// newline. Note that the string is not null terminated.
+static const char *next_line(const char *start, size_t length) {
+    // Handle the hopeless case.
+    if (length < 1) return NULL;
+
+    // Get a pointer to the end, that we must not pass.
+    const char *const end = start + length;
+
+    // Skip past the next newline.
+    const char *nextline = (const char *)std::memchr(start, '\n', length);
+    if (!nextline || nextline >= end) {
+        return NULL;
+    }
+    // Skip past the newline character itself.
+    if (++nextline >= end) {
+        return NULL;
+    }
+
+    // Make sure this new line is itself "newline terminated". If it's not, return NULL.
+    const char *next_newline = (const char *)std::memchr(nextline, '\n', end - nextline);
+    if (!next_newline) {
+        return NULL;
+    }
+
+    return nextline;
+}
+
+/// Support for iteratively locating the offsets of history items.
+/// Pass the file contents and a pointer to a cursor size_t, initially 0.
+/// If custoff_timestamp is nonzero, skip items created at or after that timestamp.
+/// Returns (size_t)-1 when done.
+static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
+                                           size_t *inout_cursor, time_t cutoff_timestamp) {
+    size_t cursor = *inout_cursor;
+    size_t result = size_t(-1);
+    const size_t length = contents.length();
+    const char *const begin = contents.begin();
+    const char *const end = contents.end();
+    while (cursor < length) {
+        const char *line_start = contents.address_at(cursor);
+
+        // Advance the cursor to the next line.
+        const char *a_newline = (const char *)std::memchr(line_start, '\n', length - cursor);
+        if (a_newline == NULL) break;
+
+        // Advance the cursor past this line. +1 is for the newline.
+        cursor = a_newline - begin + 1;
+
+        // Skip lines with a leading space, since these are in the interior of one of our items.
+        if (line_start[0] == ' ') continue;
+
+        // Skip very short lines to make one of the checks below easier.
+        if (a_newline - line_start < 3) continue;
+
+        // Try to be a little YAML compatible. Skip lines with leading %, ---, or ...
+        if (!std::memcmp(line_start, "%", 1) || !std::memcmp(line_start, "---", 3) ||
+            !std::memcmp(line_start, "...", 3))
+            continue;
+
+        // Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of
+        // leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:".
+        const char *double_cmd = "- cmd: - cmd: ";
+        const size_t double_cmd_len = std::strlen(double_cmd);
+        while ((size_t)(a_newline - line_start) > double_cmd_len &&
+               !std::memcmp(line_start, double_cmd, double_cmd_len)) {
+            // Skip over just one of the - cmd. In the end there will be just one left.
+            line_start += std::strlen("- cmd: ");
+        }
+
+        // Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when:
+        // 123456". Ignore those.
+        const char *cmd_when = "- cmd:    when:";
+        const size_t cmd_when_len = std::strlen(cmd_when);
+        if ((size_t)(a_newline - line_start) >= cmd_when_len &&
+            !std::memcmp(line_start, cmd_when, cmd_when_len)) {
+            continue;
+        }
+
+        // At this point, we know line_start is at the beginning of an item. But maybe we want to
+        // skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then
+        // try parsing out a timestamp.
+        if (cutoff_timestamp != 0) {
+            // Hackish fast way to skip items created after our timestamp. This is the mechanism by
+            // which we avoid "seeing" commands from other sessions that started after we started.
+            // We try hard to ensure that our items are sorted by their timestamps, so in theory we
+            // could just break, but I don't think that works well if (for example) the clock
+            // changes. So we'll read all subsequent items.
+            // Walk over lines that we think are interior. These lines are not null terminated, but
+            // are guaranteed to contain a newline.
+            bool has_timestamp = false;
+            time_t timestamp = 0;
+            const char *interior_line;
+
+            for (interior_line = next_line(line_start, end - line_start);
+                 interior_line != NULL && !has_timestamp;
+                 interior_line = next_line(interior_line, end - interior_line)) {
+                // If the first character is not a space, it's not an interior line, so we're done.
+                if (interior_line[0] != ' ') break;
+
+                // Hackish optimization: since we just stepped over some interior line, update the
+                // cursor so we don't have to look at these lines next time.
+                cursor = interior_line - begin;
+
+                // Try parsing a timestamp from this line. If we succeed, the loop will break.
+                has_timestamp = parse_timestamp(interior_line, &timestamp);
+            }
+
+            // Skip this item if the timestamp is past our cutoff.
+            if (has_timestamp && timestamp > cutoff_timestamp) {
+                continue;
+            }
+        }
+
+        // We made it through the gauntlet.
+        result = line_start - begin;
+        break;  //!OCLINT(avoid branching statement as last in loop)
+    }
+
+    *inout_cursor = cursor;
+    return result;
+}
+
+/// Remove backslashes from all newlines. This makes a string from the history file better formated
+/// for on screen display.
+static wcstring history_unescape_newlines_fish_1_x(const wcstring &in_str) {
+    wcstring out;
+    for (const wchar_t *in = in_str.c_str(); *in; in++) {
+        if (*in == L'\\') {
+            if (*(in + 1) != L'\n') {
+                out.push_back(*in);
+            }
+        } else {
+            out.push_back(*in);
+        }
+    }
+    return out;
+}
+
+/// Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get().
+static history_item_t decode_item_fish_1_x(const char *begin, size_t length) {
+    const char *end = begin + length;
+    const char *pos = begin;
+    wcstring out;
+    bool was_backslash = false;
+    bool first_char = true;
+    bool timestamp_mode = false;
+    time_t timestamp = 0;
+
+    while (1) {
+        wchar_t c;
+        size_t res;
+        mbstate_t state = {};
+
+        if (MB_CUR_MAX == 1) {  // single-byte locale
+            c = (unsigned char)*pos;
+            res = 1;
+        } else {
+            res = std::mbrtowc(&c, pos, end - pos, &state);
+        }
+
+        if (res == (size_t)-1) {
+            pos++;
+            continue;
+        } else if (res == (size_t)-2) {
+            break;
+        } else if (res == (size_t)0) {
+            pos++;
+            continue;
+        }
+        pos += res;
+
+        if (c == L'\n') {
+            if (timestamp_mode) {
+                const wchar_t *time_string = out.c_str();
+                while (*time_string && !iswdigit(*time_string)) time_string++;
+
+                if (*time_string) {
+                    time_t tm = (time_t)fish_wcstol(time_string);
+                    if (!errno && tm >= 0) {
+                        timestamp = tm;
+                    }
+                }
+
+                out.clear();
+                timestamp_mode = false;
+                continue;
+            }
+            if (!was_backslash) break;
+        }
+
+        if (first_char) {
+            first_char = false;
+            if (c == L'#') timestamp_mode = true;
+        }
+
+        out.push_back(c);
+        was_backslash = (c == L'\\') && !was_backslash;
+    }
+
+    out = history_unescape_newlines_fish_1_x(out);
+    return history_item_t(out, timestamp);
+}
+
+/// Same as offset_of_next_item_fish_2_0, but for fish 1.x (pre fishfish).
+/// Adapted from history_populate_from_mmap in history.c
+static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
+                                           size_t *inout_cursor) {
+    if (mmap_length == 0 || *inout_cursor >= mmap_length) return (size_t)-1;
+
+    const char *end = begin + mmap_length;
+    const char *pos;
+    bool ignore_newline = false;
+    bool do_push = true;
+    bool all_done = false;
+    size_t result = *inout_cursor;
+
+    for (pos = begin + *inout_cursor; pos < end && !all_done; pos++) {
+        if (do_push) {
+            ignore_newline = (*pos == '#');
+            do_push = false;
+        }
+
+        if (*pos == '\\') {
+            pos++;
+        } else if (*pos == '\n') {
+            if (!ignore_newline) {
+                // pos will be left pointing just after this newline, because of the ++ in the loop.
+                all_done = true;
+            }
+            ignore_newline = false;
+        }
+    }
+
+    *inout_cursor = (pos - begin);
+    return result;
+}
--- a/src/history_file.h
+++ b/src/history_file.h
@ -0,0 +1,72 @@
+#ifndef FISH_HISTORY_FILE_H
+#define FISH_HISTORY_FILE_H
+
+#include "config.h"
+
+#include "maybe.h"
+
+#include <cassert>
+#include <memory>
+
+#include <sys/mman.h>
+
+// History file types.
+enum history_file_type_t { history_type_fish_2_0, history_type_fish_1_x };
+
+/// history_file_contents_t holds the read-only contents of a file.
+class history_file_contents_t {
+   public:
+    /// Construct a history file contents from a file descriptor. The file descriptor is not closed.
+    static std::unique_ptr<history_file_contents_t> create(int fd);
+
+    /// Decode an item at a given offset.
+    history_item_t decode_item(size_t offset) const;
+
+    /// Support for iterating item offsets.
+    /// The cursor should initially be 0.
+    /// If cutoff is nonzero, skip items whose timestamp is newer than cutoff.
+    /// \return the offset of the next item, or none() on end.
+    maybe_t<size_t> offset_of_next_item(size_t *cursor, time_t cutoff);
+
+    /// Get the file type.
+    history_file_type_t type() const { return type_; }
+
+    /// Get the size of the contents.
+    size_t length() const { return length_; }
+
+    /// Return a pointer to the beginning.
+    const char *begin() const { return address_at(0); }
+
+    /// Return a pointer to one-past-the-end.
+    const char *end() const { return address_at(length_); }
+
+    /// Access the address at a given offset.
+    const char *address_at(size_t offset) const {
+        assert(offset <= length_ && "Invalid offset");
+        return start_ + offset;
+    }
+
+    ~history_file_contents_t();
+
+   private:
+    // The memory mapped pointer.
+    const char *start_;
+
+    // The mapped length.
+    const size_t length_;
+
+    // The type of the mapped file.
+    const history_file_type_t type_;
+
+    // Private constructor; use the static create() function.
+    history_file_contents_t(const char *mmap_start, size_t mmap_length, history_file_type_t type);
+
+    history_file_contents_t(history_file_contents_t &&) = delete;
+    void operator=(history_file_contents_t &&) = delete;
+};
+
+// Support for escaping and unescaping the nonstandard "yaml" format introduced in fish 2.0.
+void escape_yaml_fish_2_0(std::string *str);
+void unescape_yaml_fish_2_0(std::string *str);
+
+#endif