mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-28 05:43:11 +00:00
Migrate history file format stuff into new file history_file.cpp
Breaks up the history.cpp monolith.
This commit is contained in:
parent
74357bac91
commit
91987a4548
4 changed files with 629 additions and 596 deletions
|
@ -94,8 +94,8 @@ SET(FISH_SRCS
|
||||||
src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp
|
src/color.cpp src/common.cpp src/complete.cpp src/env.cpp src/env_dispatch.cpp
|
||||||
src/env_universal_common.cpp src/event.cpp src/exec.cpp src/expand.cpp
|
src/env_universal_common.cpp src/event.cpp src/exec.cpp src/expand.cpp
|
||||||
src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
|
src/fallback.cpp src/fish_version.cpp src/function.cpp src/highlight.cpp
|
||||||
src/history.cpp src/input.cpp src/input_common.cpp src/intern.cpp src/io.cpp
|
src/history.cpp src/history_file.cpp src/input.cpp src/input_common.cpp src/intern.cpp
|
||||||
src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
|
src/io.cpp src/iothread.cpp src/kill.cpp src/output.cpp src/pager.cpp
|
||||||
src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
|
src/parse_execution.cpp src/parse_productions.cpp src/parse_tree.cpp
|
||||||
src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
|
src/parse_util.cpp src/parser.cpp src/parser_keywords.cpp src/path.cpp
|
||||||
src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
|
src/postfork.cpp src/proc.cpp src/reader.cpp src/sanity.cpp src/screen.cpp
|
||||||
|
|
603
src/history.cpp
603
src/history.cpp
|
@ -12,7 +12,6 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
// We need the sys/file.h for the flock() declaration on Linux but not OS X.
|
// We need the sys/file.h for the flock() declaration on Linux but not OS X.
|
||||||
#include <sys/file.h> // IWYU pragma: keep
|
#include <sys/file.h> // IWYU pragma: keep
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
@ -34,6 +33,7 @@
|
||||||
#include "flog.h"
|
#include "flog.h"
|
||||||
#include "global_safety.h"
|
#include "global_safety.h"
|
||||||
#include "history.h"
|
#include "history.h"
|
||||||
|
#include "history_file.h"
|
||||||
#include "io.h"
|
#include "io.h"
|
||||||
#include "iothread.h"
|
#include "iothread.h"
|
||||||
#include "lru.h"
|
#include "lru.h"
|
||||||
|
@ -159,22 +159,6 @@ static bool history_file_lock(int fd, int lock_type) {
|
||||||
return retval != -1;
|
return retval != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// History file types.
|
|
||||||
enum history_file_type_t { history_type_fish_2_0, history_type_fish_1_x };
|
|
||||||
|
|
||||||
/// Try to infer the history file type based on inspecting the data.
|
|
||||||
static maybe_t<history_file_type_t> infer_file_type(const void *data, size_t len) {
|
|
||||||
maybe_t<history_file_type_t> result{};
|
|
||||||
if (len > 0) { // old fish started with a #
|
|
||||||
if (static_cast<const char *>(data)[0] == '#') {
|
|
||||||
result = history_type_fish_1_x;
|
|
||||||
} else { // assume new fish
|
|
||||||
result = history_type_fish_2_0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
|
/// Our LRU cache is used for restricting the amount of history we have, and limiting how long we
|
||||||
/// order it.
|
/// order it.
|
||||||
class history_lru_item_t {
|
class history_lru_item_t {
|
||||||
|
@ -214,325 +198,8 @@ class history_lru_cache_t : public lru_cache_t<history_lru_cache_t, history_lru_
|
||||||
|
|
||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
// history_file_contents_t holds the read-only contents of a file.
|
|
||||||
class history_file_contents_t {
|
|
||||||
// The memory mapped pointer.
|
|
||||||
void *start_;
|
|
||||||
|
|
||||||
// The mapped length.
|
|
||||||
size_t length_;
|
|
||||||
|
|
||||||
// The type of the mapped file.
|
|
||||||
history_file_type_t type_;
|
|
||||||
|
|
||||||
// Private constructor; use the static create() function.
|
|
||||||
history_file_contents_t(void *mmap_start, size_t mmap_length, history_file_type_t type)
|
|
||||||
: start_(mmap_start), length_(mmap_length), type_(type) {
|
|
||||||
assert(mmap_start != MAP_FAILED && "Invalid mmap address");
|
|
||||||
}
|
|
||||||
|
|
||||||
history_file_contents_t(history_file_contents_t &&) = delete;
|
|
||||||
void operator=(history_file_contents_t &&) = delete;
|
|
||||||
|
|
||||||
// Check if we should mmap the fd.
|
|
||||||
// Don't try mmap() on non-local filesystems.
|
|
||||||
static bool should_mmap(int fd) {
|
|
||||||
if (history_t::never_mmap) return false;
|
|
||||||
|
|
||||||
// mmap only if we are known not-remote (return is 0).
|
|
||||||
int ret = fd_check_is_remote(fd);
|
|
||||||
return ret == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read up to len bytes from fd into address, zeroing the rest.
|
|
||||||
// Return true on success, false on failure.
|
|
||||||
static bool read_from_fd(int fd, void *address, size_t len) {
|
|
||||||
size_t remaining = len;
|
|
||||||
char *ptr = static_cast<char *>(address);
|
|
||||||
while (remaining > 0) {
|
|
||||||
ssize_t amt = read(fd, ptr, remaining);
|
|
||||||
if (amt < 0) {
|
|
||||||
if (errno != EINTR) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (amt == 0) {
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
remaining -= amt;
|
|
||||||
ptr += amt;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::memset(ptr, 0, remaining);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
// Access the address at a given offset.
|
|
||||||
const char *address_at(size_t offset) const {
|
|
||||||
assert(offset <= length_ && "Invalid offset");
|
|
||||||
auto base = static_cast<const char *>(start_);
|
|
||||||
return base + offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return a pointer to the beginning.
|
|
||||||
const char *begin() const { return address_at(0); }
|
|
||||||
|
|
||||||
// Return a pointer to one-past-the-end.
|
|
||||||
const char *end() const { return address_at(length_); }
|
|
||||||
|
|
||||||
// Get the size of the contents.
|
|
||||||
size_t length() const { return length_; }
|
|
||||||
|
|
||||||
// Get the file type.
|
|
||||||
history_file_type_t type() const { return type_; }
|
|
||||||
|
|
||||||
~history_file_contents_t() { munmap(start_, length_); }
|
|
||||||
|
|
||||||
// Construct a history file contents from a file descriptor. The file descriptor is not closed.
|
|
||||||
static std::unique_ptr<history_file_contents_t> create(int fd) {
|
|
||||||
// Check that the file is seekable, and its size.
|
|
||||||
off_t len = lseek(fd, 0, SEEK_END);
|
|
||||||
if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr;
|
|
||||||
if (lseek(fd, 0, SEEK_SET) != 0) return nullptr;
|
|
||||||
|
|
||||||
// Read the file, possibly ussing mmap.
|
|
||||||
void *mmap_start = nullptr;
|
|
||||||
if (should_mmap(fd)) {
|
|
||||||
// We feel confident to map the file directly. Note this is still risky: if another
|
|
||||||
// process truncates the file we risk SIGBUS.
|
|
||||||
mmap_start = mmap(0, size_t(len), PROT_READ, MAP_PRIVATE, fd, 0);
|
|
||||||
if (mmap_start == MAP_FAILED) return nullptr;
|
|
||||||
} else {
|
|
||||||
// We don't want to map the file. mmap some private memory and then read into it. We use
|
|
||||||
// mmap instead of malloc so that the destructor can always munmap().
|
|
||||||
mmap_start =
|
|
||||||
#ifdef MAP_ANON
|
|
||||||
mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
|
||||||
#else
|
|
||||||
mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
||||||
#endif
|
|
||||||
if (mmap_start == MAP_FAILED) return nullptr;
|
|
||||||
if (!read_from_fd(fd, mmap_start, len)) return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check the file type.
|
|
||||||
auto mtype = infer_file_type(mmap_start, len);
|
|
||||||
if (!mtype) return nullptr;
|
|
||||||
|
|
||||||
return std::unique_ptr<history_file_contents_t>(
|
|
||||||
new history_file_contents_t(mmap_start, len, *mtype));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
static wcstring history_filename(const wcstring &name, const wcstring &suffix);
|
static wcstring history_filename(const wcstring &name, const wcstring &suffix);
|
||||||
|
|
||||||
/// Replaces newlines with a literal backslash followed by an n, and replaces backslashes with two
|
|
||||||
/// backslashes.
|
|
||||||
static void escape_yaml(std::string *str);
|
|
||||||
|
|
||||||
/// Inverse of escape_yaml.
|
|
||||||
static void unescape_yaml(std::string *str);
|
|
||||||
|
|
||||||
/// Read one line, stripping off any newline, and updating cursor. Note that our input string is NOT
|
|
||||||
/// null terminated; it's just a memory mapped file.
|
|
||||||
static size_t read_line(const char *base, size_t cursor, size_t len, std::string &result) {
|
|
||||||
// Locate the newline.
|
|
||||||
assert(cursor <= len);
|
|
||||||
const char *start = base + cursor;
|
|
||||||
const char *a_newline = (char *)std::memchr(start, '\n', len - cursor);
|
|
||||||
if (a_newline != NULL) { // we found a newline
|
|
||||||
result.assign(start, a_newline - start);
|
|
||||||
// Return the amount to advance the cursor; skip over the newline.
|
|
||||||
return a_newline - start + 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// We ran off the end.
|
|
||||||
result.clear();
|
|
||||||
return len - cursor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trims leading spaces in the given string, returning how many there were.
|
|
||||||
static size_t trim_leading_spaces(std::string &str) {
|
|
||||||
size_t i = 0, max = str.size();
|
|
||||||
while (i < max && str[i] == ' ') i++;
|
|
||||||
str.erase(0, i);
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value,
|
|
||||||
const std::string &line) {
|
|
||||||
size_t where = line.find(':');
|
|
||||||
if (where != std::string::npos) {
|
|
||||||
key->assign(line, 0, where);
|
|
||||||
|
|
||||||
// Skip a space after the : if necessary.
|
|
||||||
size_t val_start = where + 1;
|
|
||||||
if (val_start < line.size() && line.at(val_start) == ' ') val_start++;
|
|
||||||
value->assign(line, val_start, line.size() - val_start);
|
|
||||||
|
|
||||||
unescape_yaml(key);
|
|
||||||
unescape_yaml(value);
|
|
||||||
}
|
|
||||||
return where != std::string::npos;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove backslashes from all newlines. This makes a string from the history file better formated
|
|
||||||
/// for on screen display.
|
|
||||||
static wcstring history_unescape_newlines_fish_1_x(const wcstring &in_str) {
|
|
||||||
wcstring out;
|
|
||||||
for (const wchar_t *in = in_str.c_str(); *in; in++) {
|
|
||||||
if (*in == L'\\') {
|
|
||||||
if (*(in + 1) != L'\n') {
|
|
||||||
out.push_back(*in);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
out.push_back(*in);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get().
|
|
||||||
static history_item_t decode_item_fish_1_x(const char *begin, size_t length) {
|
|
||||||
const char *end = begin + length;
|
|
||||||
const char *pos = begin;
|
|
||||||
wcstring out;
|
|
||||||
bool was_backslash = false;
|
|
||||||
bool first_char = true;
|
|
||||||
bool timestamp_mode = false;
|
|
||||||
time_t timestamp = 0;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
wchar_t c;
|
|
||||||
size_t res;
|
|
||||||
mbstate_t state = {};
|
|
||||||
|
|
||||||
if (MB_CUR_MAX == 1) { // single-byte locale
|
|
||||||
c = (unsigned char)*pos;
|
|
||||||
res = 1;
|
|
||||||
} else {
|
|
||||||
res = std::mbrtowc(&c, pos, end - pos, &state);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (res == (size_t)-1) {
|
|
||||||
pos++;
|
|
||||||
continue;
|
|
||||||
} else if (res == (size_t)-2) {
|
|
||||||
break;
|
|
||||||
} else if (res == (size_t)0) {
|
|
||||||
pos++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
pos += res;
|
|
||||||
|
|
||||||
if (c == L'\n') {
|
|
||||||
if (timestamp_mode) {
|
|
||||||
const wchar_t *time_string = out.c_str();
|
|
||||||
while (*time_string && !iswdigit(*time_string)) time_string++;
|
|
||||||
|
|
||||||
if (*time_string) {
|
|
||||||
time_t tm = (time_t)fish_wcstol(time_string);
|
|
||||||
if (!errno && tm >= 0) {
|
|
||||||
timestamp = tm;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out.clear();
|
|
||||||
timestamp_mode = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (!was_backslash) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (first_char) {
|
|
||||||
first_char = false;
|
|
||||||
if (c == L'#') timestamp_mode = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
out.push_back(c);
|
|
||||||
was_backslash = (c == L'\\') && !was_backslash;
|
|
||||||
}
|
|
||||||
|
|
||||||
out = history_unescape_newlines_fish_1_x(out);
|
|
||||||
return history_item_t(out, timestamp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Decode an item via the fish 2.0 format.
|
|
||||||
static history_item_t decode_item_fish_2_0(const char *base, size_t len) {
|
|
||||||
wcstring cmd;
|
|
||||||
time_t when = 0;
|
|
||||||
path_list_t paths;
|
|
||||||
|
|
||||||
size_t indent = 0, cursor = 0;
|
|
||||||
std::string key, value, line;
|
|
||||||
|
|
||||||
// Read the "- cmd:" line.
|
|
||||||
size_t advance = read_line(base, cursor, len, line);
|
|
||||||
trim_leading_spaces(line);
|
|
||||||
if (!extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd") {
|
|
||||||
goto done; //!OCLINT(goto is the cleanest way to handle bad input)
|
|
||||||
}
|
|
||||||
|
|
||||||
cursor += advance;
|
|
||||||
cmd = str2wcstring(value);
|
|
||||||
|
|
||||||
// Read the remaining lines.
|
|
||||||
for (;;) {
|
|
||||||
size_t advance = read_line(base, cursor, len, line);
|
|
||||||
|
|
||||||
size_t this_indent = trim_leading_spaces(line);
|
|
||||||
if (indent == 0) indent = this_indent;
|
|
||||||
|
|
||||||
if (this_indent == 0 || indent != this_indent) break;
|
|
||||||
|
|
||||||
if (!extract_prefix_and_unescape_yaml(&key, &value, line)) break;
|
|
||||||
|
|
||||||
// We are definitely going to consume this line.
|
|
||||||
cursor += advance;
|
|
||||||
|
|
||||||
if (key == "when") {
|
|
||||||
// Parse an int from the timestamp. Should this fail, strtol returns 0; that's
|
|
||||||
// acceptable.
|
|
||||||
char *end = NULL;
|
|
||||||
long tmp = strtol(value.c_str(), &end, 0);
|
|
||||||
when = tmp;
|
|
||||||
} else if (key == "paths") {
|
|
||||||
// Read lines starting with " - " until we can't read any more.
|
|
||||||
for (;;) {
|
|
||||||
size_t advance = read_line(base, cursor, len, line);
|
|
||||||
if (trim_leading_spaces(line) <= indent) break;
|
|
||||||
|
|
||||||
if (std::strncmp(line.c_str(), "- ", 2)) break;
|
|
||||||
|
|
||||||
// We're going to consume this line.
|
|
||||||
cursor += advance;
|
|
||||||
|
|
||||||
// Skip the leading dash-space and then store this path it.
|
|
||||||
line.erase(0, 2);
|
|
||||||
unescape_yaml(&line);
|
|
||||||
paths.push_back(str2wcstring(line));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
|
||||||
history_item_t result(cmd, when);
|
|
||||||
result.set_required_paths(paths);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static history_item_t decode_item(const history_file_contents_t &contents, size_t offset) {
|
|
||||||
const char *base = contents.address_at(offset);
|
|
||||||
size_t len = contents.length() - offset;
|
|
||||||
switch (contents.type()) {
|
|
||||||
case history_type_fish_2_0:
|
|
||||||
return decode_item_fish_2_0(base, len);
|
|
||||||
case history_type_fish_1_x:
|
|
||||||
return decode_item_fish_1_x(base, len);
|
|
||||||
}
|
|
||||||
return history_item_t(L"");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// We can merge two items if they are the same command. We use the more recent timestamp, more
|
/// We can merge two items if they are the same command. We use the more recent timestamp, more
|
||||||
/// recent identifier, and the longer list of required paths.
|
/// recent identifier, and the longer list of required paths.
|
||||||
bool history_item_t::merge(const history_item_t &item) {
|
bool history_item_t::merge(const history_item_t &item) {
|
||||||
|
@ -592,7 +259,7 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp,
|
||||||
const path_list_t &required_paths,
|
const path_list_t &required_paths,
|
||||||
history_output_buffer_t *buffer) {
|
history_output_buffer_t *buffer) {
|
||||||
std::string cmd = wcs2string(wcmd);
|
std::string cmd = wcs2string(wcmd);
|
||||||
escape_yaml(&cmd);
|
escape_yaml_fish_2_0(&cmd);
|
||||||
buffer->append("- cmd: ", cmd.c_str(), "\n");
|
buffer->append("- cmd: ", cmd.c_str(), "\n");
|
||||||
buffer->append(" when: ", std::to_string(timestamp).c_str(), "\n");
|
buffer->append(" when: ", std::to_string(timestamp).c_str(), "\n");
|
||||||
|
|
||||||
|
@ -601,206 +268,12 @@ static void append_yaml_to_buffer(const wcstring &wcmd, time_t timestamp,
|
||||||
|
|
||||||
for (auto const &wpath : required_paths) {
|
for (auto const &wpath : required_paths) {
|
||||||
std::string path = wcs2string(wpath);
|
std::string path = wcs2string(wpath);
|
||||||
escape_yaml(&path);
|
escape_yaml_fish_2_0(&path);
|
||||||
buffer->append(" - ", path.c_str(), "\n");
|
buffer->append(" - ", path.c_str(), "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a timestamp line that looks like this: spaces, "when:", spaces, timestamp, newline
|
|
||||||
/// The string is NOT null terminated; however we do know it contains a newline, so stop when we
|
|
||||||
/// reach it.
|
|
||||||
static bool parse_timestamp(const char *str, time_t *out_when) {
|
|
||||||
const char *cursor = str;
|
|
||||||
// Advance past spaces.
|
|
||||||
while (*cursor == ' ') cursor++;
|
|
||||||
|
|
||||||
// Look for "when:".
|
|
||||||
size_t when_len = 5;
|
|
||||||
if (std::strncmp(cursor, "when:", when_len) != 0) return false;
|
|
||||||
cursor += when_len;
|
|
||||||
|
|
||||||
// Advance past spaces.
|
|
||||||
while (*cursor == ' ') cursor++;
|
|
||||||
|
|
||||||
// Try to parse a timestamp.
|
|
||||||
long timestamp = 0;
|
|
||||||
if (isdigit(*cursor) && (timestamp = strtol(cursor, NULL, 0)) > 0) {
|
|
||||||
*out_when = (time_t)timestamp;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a pointer to the start of the next line, or NULL. The next line must itself end with a
|
|
||||||
/// newline. Note that the string is not null terminated.
|
|
||||||
static const char *next_line(const char *start, size_t length) {
|
|
||||||
// Handle the hopeless case.
|
|
||||||
if (length < 1) return NULL;
|
|
||||||
|
|
||||||
// Get a pointer to the end, that we must not pass.
|
|
||||||
const char *const end = start + length;
|
|
||||||
|
|
||||||
// Skip past the next newline.
|
|
||||||
const char *nextline = (const char *)std::memchr(start, '\n', length);
|
|
||||||
if (!nextline || nextline >= end) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
// Skip past the newline character itself.
|
|
||||||
if (++nextline >= end) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Make sure this new line is itself "newline terminated". If it's not, return NULL.
|
|
||||||
const char *next_newline = (const char *)std::memchr(nextline, '\n', end - nextline);
|
|
||||||
if (!next_newline) {
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
return nextline;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Support for iteratively locating the offsets of history items.
|
|
||||||
/// Pass the file contents and a pointer to a cursor size_t, initially 0.
|
|
||||||
/// If custoff_timestamp is nonzero, skip items created at or after that timestamp.
|
|
||||||
/// Returns (size_t)-1 when done.
|
|
||||||
static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
|
|
||||||
size_t *inout_cursor, time_t cutoff_timestamp) {
|
|
||||||
size_t cursor = *inout_cursor;
|
|
||||||
size_t result = size_t(-1);
|
|
||||||
const size_t length = contents.length();
|
|
||||||
const char *const begin = contents.begin();
|
|
||||||
const char *const end = contents.end();
|
|
||||||
while (cursor < length) {
|
|
||||||
const char *line_start = contents.address_at(cursor);
|
|
||||||
|
|
||||||
// Advance the cursor to the next line.
|
|
||||||
const char *a_newline = (const char *)std::memchr(line_start, '\n', length - cursor);
|
|
||||||
if (a_newline == NULL) break;
|
|
||||||
|
|
||||||
// Advance the cursor past this line. +1 is for the newline.
|
|
||||||
cursor = a_newline - begin + 1;
|
|
||||||
|
|
||||||
// Skip lines with a leading space, since these are in the interior of one of our items.
|
|
||||||
if (line_start[0] == ' ') continue;
|
|
||||||
|
|
||||||
// Skip very short lines to make one of the checks below easier.
|
|
||||||
if (a_newline - line_start < 3) continue;
|
|
||||||
|
|
||||||
// Try to be a little YAML compatible. Skip lines with leading %, ---, or ...
|
|
||||||
if (!std::memcmp(line_start, "%", 1) || !std::memcmp(line_start, "---", 3) ||
|
|
||||||
!std::memcmp(line_start, "...", 3))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of
|
|
||||||
// leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:".
|
|
||||||
const char *double_cmd = "- cmd: - cmd: ";
|
|
||||||
const size_t double_cmd_len = std::strlen(double_cmd);
|
|
||||||
while ((size_t)(a_newline - line_start) > double_cmd_len &&
|
|
||||||
!std::memcmp(line_start, double_cmd, double_cmd_len)) {
|
|
||||||
// Skip over just one of the - cmd. In the end there will be just one left.
|
|
||||||
line_start += std::strlen("- cmd: ");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when:
|
|
||||||
// 123456". Ignore those.
|
|
||||||
const char *cmd_when = "- cmd: when:";
|
|
||||||
const size_t cmd_when_len = std::strlen(cmd_when);
|
|
||||||
if ((size_t)(a_newline - line_start) >= cmd_when_len &&
|
|
||||||
!std::memcmp(line_start, cmd_when, cmd_when_len)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// At this point, we know line_start is at the beginning of an item. But maybe we want to
|
|
||||||
// skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then
|
|
||||||
// try parsing out a timestamp.
|
|
||||||
if (cutoff_timestamp != 0) {
|
|
||||||
// Hackish fast way to skip items created after our timestamp. This is the mechanism by
|
|
||||||
// which we avoid "seeing" commands from other sessions that started after we started.
|
|
||||||
// We try hard to ensure that our items are sorted by their timestamps, so in theory we
|
|
||||||
// could just break, but I don't think that works well if (for example) the clock
|
|
||||||
// changes. So we'll read all subsequent items.
|
|
||||||
// Walk over lines that we think are interior. These lines are not null terminated, but
|
|
||||||
// are guaranteed to contain a newline.
|
|
||||||
bool has_timestamp = false;
|
|
||||||
time_t timestamp = 0;
|
|
||||||
const char *interior_line;
|
|
||||||
|
|
||||||
for (interior_line = next_line(line_start, end - line_start);
|
|
||||||
interior_line != NULL && !has_timestamp;
|
|
||||||
interior_line = next_line(interior_line, end - interior_line)) {
|
|
||||||
// If the first character is not a space, it's not an interior line, so we're done.
|
|
||||||
if (interior_line[0] != ' ') break;
|
|
||||||
|
|
||||||
// Hackish optimization: since we just stepped over some interior line, update the
|
|
||||||
// cursor so we don't have to look at these lines next time.
|
|
||||||
cursor = interior_line - begin;
|
|
||||||
|
|
||||||
// Try parsing a timestamp from this line. If we succeed, the loop will break.
|
|
||||||
has_timestamp = parse_timestamp(interior_line, ×tamp);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip this item if the timestamp is past our cutoff.
|
|
||||||
if (has_timestamp && timestamp > cutoff_timestamp) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// We made it through the gauntlet.
|
|
||||||
result = line_start - begin;
|
|
||||||
break; //!OCLINT(avoid branching statement as last in loop)
|
|
||||||
}
|
|
||||||
|
|
||||||
*inout_cursor = cursor;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Same as offset_of_next_item_fish_2_0, but for fish 1.x (pre fishfish).
|
|
||||||
/// Adapted from history_populate_from_mmap in history.c
|
|
||||||
static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
|
|
||||||
size_t *inout_cursor) {
|
|
||||||
if (mmap_length == 0 || *inout_cursor >= mmap_length) return (size_t)-1;
|
|
||||||
|
|
||||||
const char *end = begin + mmap_length;
|
|
||||||
const char *pos;
|
|
||||||
bool ignore_newline = false;
|
|
||||||
bool do_push = true;
|
|
||||||
bool all_done = false;
|
|
||||||
size_t result = *inout_cursor;
|
|
||||||
|
|
||||||
for (pos = begin + *inout_cursor; pos < end && !all_done; pos++) {
|
|
||||||
if (do_push) {
|
|
||||||
ignore_newline = (*pos == '#');
|
|
||||||
do_push = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*pos == '\\') {
|
|
||||||
pos++;
|
|
||||||
} else if (*pos == '\n') {
|
|
||||||
if (!ignore_newline) {
|
|
||||||
// pos will be left pointing just after this newline, because of the ++ in the loop.
|
|
||||||
all_done = true;
|
|
||||||
}
|
|
||||||
ignore_newline = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*inout_cursor = (pos - begin);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the offset of the next item based on the given history type, or -1.
|
|
||||||
static size_t offset_of_next_item(const history_file_contents_t &contents, size_t *inout_cursor,
|
|
||||||
time_t cutoff_timestamp) {
|
|
||||||
switch (contents.type()) {
|
|
||||||
case history_type_fish_2_0:
|
|
||||||
return offset_of_next_item_fish_2_0(contents, inout_cursor, cutoff_timestamp);
|
|
||||||
case history_type_fish_1_x:
|
|
||||||
return offset_of_next_item_fish_1_x(contents.begin(), contents.length(), inout_cursor);
|
|
||||||
}
|
|
||||||
return size_t(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct history_impl_t {
|
struct history_impl_t {
|
||||||
// Privately add an item. If pending, the item will not be returned by history searches until a
|
// Privately add an item. If pending, the item will not be returned by history searches until a
|
||||||
// call to resolve_pending.
|
// call to resolve_pending.
|
||||||
|
@ -857,9 +330,6 @@ struct history_impl_t {
|
||||||
// Loads old items if necessary.
|
// Loads old items if necessary.
|
||||||
void load_old_if_needed();
|
void load_old_if_needed();
|
||||||
|
|
||||||
// Reads the history file if necessary.
|
|
||||||
bool mmap_if_needed();
|
|
||||||
|
|
||||||
// Deletes duplicates in new_items.
|
// Deletes duplicates in new_items.
|
||||||
void compact_new_items();
|
void compact_new_items();
|
||||||
|
|
||||||
|
@ -1062,7 +532,7 @@ void history_impl_t::get_history(wcstring_list_t &result) {
|
||||||
load_old_if_needed();
|
load_old_if_needed();
|
||||||
for (auto iter = old_item_offsets.crbegin(); iter != old_item_offsets.crend(); ++iter) {
|
for (auto iter = old_item_offsets.crbegin(); iter != old_item_offsets.crend(); ++iter) {
|
||||||
size_t offset = *iter;
|
size_t offset = *iter;
|
||||||
const history_item_t item = decode_item(*file_contents, offset);
|
const history_item_t item = file_contents->decode_item(offset);
|
||||||
if (seen.insert(item.str()).second) result.push_back(item.str());
|
if (seen.insert(item.str()).second) result.push_back(item.str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1099,7 +569,7 @@ history_item_t history_impl_t::item_at_index(size_t idx) {
|
||||||
if (idx < old_item_count) {
|
if (idx < old_item_count) {
|
||||||
// idx == 0 corresponds to last item in old_item_offsets.
|
// idx == 0 corresponds to last item in old_item_offsets.
|
||||||
size_t offset = old_item_offsets.at(old_item_count - idx - 1);
|
size_t offset = old_item_offsets.at(old_item_count - idx - 1);
|
||||||
return decode_item(*file_contents, offset);
|
return file_contents->decode_item(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index past the valid range, so return an empty history item.
|
// Index past the valid range, so return an empty history item.
|
||||||
|
@ -1129,13 +599,9 @@ void history_impl_t::populate_from_file_contents() {
|
||||||
old_item_offsets.clear();
|
old_item_offsets.clear();
|
||||||
if (file_contents) {
|
if (file_contents) {
|
||||||
size_t cursor = 0;
|
size_t cursor = 0;
|
||||||
for (;;) {
|
while (auto offset = file_contents->offset_of_next_item(&cursor, boundary_timestamp)) {
|
||||||
size_t offset = offset_of_next_item(*file_contents, &cursor, boundary_timestamp);
|
|
||||||
// If we get back -1, we're done.
|
|
||||||
if (offset == size_t(-1)) break;
|
|
||||||
|
|
||||||
// Remember this item.
|
// Remember this item.
|
||||||
old_item_offsets.push_back(offset);
|
old_item_offsets.push_back(*offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1219,52 +685,6 @@ wcstring history_search_t::current_string() const {
|
||||||
return item.str();
|
return item.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
static void replace_all(std::string *str, const char *needle, const char *replacement) {
|
|
||||||
size_t needle_len = std::strlen(needle), replacement_len = std::strlen(replacement);
|
|
||||||
size_t offset = 0;
|
|
||||||
while ((offset = str->find(needle, offset)) != std::string::npos) {
|
|
||||||
str->replace(offset, needle_len, replacement);
|
|
||||||
offset += replacement_len;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void escape_yaml(std::string *str) {
|
|
||||||
replace_all(str, "\\", "\\\\"); // replace one backslash with two
|
|
||||||
replace_all(str, "\n", "\\n"); // replace newline with backslash + literal n
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This function is called frequently, so it ought to be fast.
|
|
||||||
static void unescape_yaml(std::string *str) {
|
|
||||||
size_t cursor = 0, size = str->size();
|
|
||||||
while (cursor < size) {
|
|
||||||
// Operate on a const version of str, to avoid needless COWs that at() does.
|
|
||||||
const std::string &const_str = *str;
|
|
||||||
|
|
||||||
// Look for a backslash.
|
|
||||||
size_t backslash = const_str.find('\\', cursor);
|
|
||||||
if (backslash == std::string::npos || backslash + 1 >= size) {
|
|
||||||
// Either not found, or found as the last character.
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
// Backslash found. Maybe we'll do something about it. Be sure to invoke the const
|
|
||||||
// version of at().
|
|
||||||
char escaped_char = const_str.at(backslash + 1);
|
|
||||||
if (escaped_char == '\\') {
|
|
||||||
// Two backslashes in a row. Delete the second one.
|
|
||||||
str->erase(backslash + 1, 1);
|
|
||||||
size--;
|
|
||||||
} else if (escaped_char == 'n') {
|
|
||||||
// Backslash + n. Replace with a newline.
|
|
||||||
str->replace(backslash, 2, "\n");
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
// The character at index backslash has now been made whole; start at the next
|
|
||||||
// character.
|
|
||||||
cursor = backslash + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static wcstring history_filename(const wcstring &session_id, const wcstring &suffix) {
|
static wcstring history_filename(const wcstring &session_id, const wcstring &suffix) {
|
||||||
if (session_id.empty()) return L"";
|
if (session_id.empty()) return L"";
|
||||||
|
|
||||||
|
@ -1321,16 +741,11 @@ bool history_impl_t::rewrite_to_temporary_file(int existing_fd, int dst_fd) cons
|
||||||
// old file contents).
|
// old file contents).
|
||||||
if (auto local_file = history_file_contents_t::create(existing_fd)) {
|
if (auto local_file = history_file_contents_t::create(existing_fd)) {
|
||||||
size_t cursor = 0;
|
size_t cursor = 0;
|
||||||
for (;;) {
|
while (auto offset = local_file->offset_of_next_item(&cursor, 0)) {
|
||||||
size_t offset = offset_of_next_item(*local_file, &cursor, 0);
|
|
||||||
// If we get back -1, we're done.
|
|
||||||
if (offset == (size_t)-1) break;
|
|
||||||
|
|
||||||
// Try decoding an old item.
|
// Try decoding an old item.
|
||||||
const history_item_t old_item = decode_item(*local_file, offset);
|
const history_item_t old_item = local_file->decode_item(*offset);
|
||||||
|
|
||||||
if (old_item.empty() || deleted_items.count(old_item.str()) > 0) {
|
if (old_item.empty() || deleted_items.count(old_item.str()) > 0) {
|
||||||
// FLOGF(error, L"Item is deleted : %s\n", old_item.str().c_str());
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Add this old item.
|
// Add this old item.
|
||||||
|
|
546
src/history_file.cpp
Normal file
546
src/history_file.cpp
Normal file
|
@ -0,0 +1,546 @@
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "history.h"
|
||||||
|
#include "history_file.h"
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
// Some forward declarations.
|
||||||
|
static history_item_t decode_item_fish_2_0(const char *base, size_t len);
|
||||||
|
static history_item_t decode_item_fish_1_x(const char *begin, size_t length);
|
||||||
|
|
||||||
|
static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
|
||||||
|
size_t *inout_cursor, time_t cutoff_timestamp);
|
||||||
|
static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
|
||||||
|
size_t *inout_cursor);
|
||||||
|
|
||||||
|
// Check if we should mmap the fd.
|
||||||
|
// Don't try mmap() on non-local filesystems.
|
||||||
|
static bool should_mmap(int fd) {
|
||||||
|
if (history_t::never_mmap) return false;
|
||||||
|
|
||||||
|
// mmap only if we are known not-remote (return is 0).
|
||||||
|
int ret = fd_check_is_remote(fd);
|
||||||
|
return ret == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read up to len bytes from fd into address, zeroing the rest.
|
||||||
|
// Return true on success, false on failure.
|
||||||
|
static bool read_from_fd(int fd, void *address, size_t len) {
|
||||||
|
size_t remaining = len;
|
||||||
|
char *ptr = static_cast<char *>(address);
|
||||||
|
while (remaining > 0) {
|
||||||
|
ssize_t amt = read(fd, ptr, remaining);
|
||||||
|
if (amt < 0) {
|
||||||
|
if (errno != EINTR) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (amt == 0) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
remaining -= amt;
|
||||||
|
ptr += amt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::memset(ptr, 0, remaining);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to infer the history file type based on inspecting the data.
|
||||||
|
static maybe_t<history_file_type_t> infer_file_type(const void *data, size_t len) {
|
||||||
|
maybe_t<history_file_type_t> result{};
|
||||||
|
if (len > 0) { // old fish started with a #
|
||||||
|
if (static_cast<const char *>(data)[0] == '#') {
|
||||||
|
result = history_type_fish_1_x;
|
||||||
|
} else { // assume new fish
|
||||||
|
result = history_type_fish_2_0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void replace_all(std::string *str, const char *needle, const char *replacement) {
|
||||||
|
size_t needle_len = std::strlen(needle), replacement_len = std::strlen(replacement);
|
||||||
|
size_t offset = 0;
|
||||||
|
while ((offset = str->find(needle, offset)) != std::string::npos) {
|
||||||
|
str->replace(offset, needle_len, replacement);
|
||||||
|
offset += replacement_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void escape_yaml_fish_2_0(std::string *str) {
|
||||||
|
replace_all(str, "\\", "\\\\"); // replace one backslash with two
|
||||||
|
replace_all(str, "\n", "\\n"); // replace newline with backslash + literal n
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This function is called frequently, so it ought to be fast.
|
||||||
|
void unescape_yaml_fish_2_0(std::string *str) {
|
||||||
|
size_t cursor = 0, size = str->size();
|
||||||
|
while (cursor < size) {
|
||||||
|
// Operate on a const version of str, to avoid needless COWs that at() does.
|
||||||
|
const std::string &const_str = *str;
|
||||||
|
|
||||||
|
// Look for a backslash.
|
||||||
|
size_t backslash = const_str.find('\\', cursor);
|
||||||
|
if (backslash == std::string::npos || backslash + 1 >= size) {
|
||||||
|
// Either not found, or found as the last character.
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// Backslash found. Maybe we'll do something about it. Be sure to invoke the const
|
||||||
|
// version of at().
|
||||||
|
char escaped_char = const_str.at(backslash + 1);
|
||||||
|
if (escaped_char == '\\') {
|
||||||
|
// Two backslashes in a row. Delete the second one.
|
||||||
|
str->erase(backslash + 1, 1);
|
||||||
|
size--;
|
||||||
|
} else if (escaped_char == 'n') {
|
||||||
|
// Backslash + n. Replace with a newline.
|
||||||
|
str->replace(backslash, 2, "\n");
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
// The character at index backslash has now been made whole; start at the next
|
||||||
|
// character.
|
||||||
|
cursor = backslash + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
history_file_contents_t::~history_file_contents_t() { munmap(const_cast<char *>(start_), length_); }
|
||||||
|
|
||||||
|
history_file_contents_t::history_file_contents_t(const char *mmap_start, size_t mmap_length,
|
||||||
|
history_file_type_t type)
|
||||||
|
: start_(mmap_start), length_(mmap_length), type_(type) {
|
||||||
|
assert(mmap_start != MAP_FAILED && "Invalid mmap address");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<history_file_contents_t> history_file_contents_t::create(int fd) {
|
||||||
|
// Check that the file is seekable, and its size.
|
||||||
|
off_t len = lseek(fd, 0, SEEK_END);
|
||||||
|
if (len <= 0 || static_cast<unsigned long>(len) >= SIZE_MAX) return nullptr;
|
||||||
|
if (lseek(fd, 0, SEEK_SET) != 0) return nullptr;
|
||||||
|
|
||||||
|
// Read the file, possibly ussing mmap.
|
||||||
|
void *mmap_start = nullptr;
|
||||||
|
if (should_mmap(fd)) {
|
||||||
|
// We feel confident to map the file directly. Note this is still risky: if another
|
||||||
|
// process truncates the file we risk SIGBUS.
|
||||||
|
mmap_start = mmap(0, size_t(len), PROT_READ, MAP_PRIVATE, fd, 0);
|
||||||
|
if (mmap_start == MAP_FAILED) return nullptr;
|
||||||
|
} else {
|
||||||
|
// We don't want to map the file. mmap some private memory and then read into it. We use
|
||||||
|
// mmap instead of malloc so that the destructor can always munmap().
|
||||||
|
mmap_start =
|
||||||
|
#ifdef MAP_ANON
|
||||||
|
mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
|
||||||
|
#else
|
||||||
|
mmap(0, size_t(len), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
#endif
|
||||||
|
if (mmap_start == MAP_FAILED) return nullptr;
|
||||||
|
if (!read_from_fd(fd, mmap_start, len)) return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the file type.
|
||||||
|
auto mtype = infer_file_type(mmap_start, len);
|
||||||
|
if (!mtype) return nullptr;
|
||||||
|
|
||||||
|
return std::unique_ptr<history_file_contents_t>(
|
||||||
|
new history_file_contents_t(static_cast<const char *>(mmap_start), len, *mtype));
|
||||||
|
}
|
||||||
|
|
||||||
|
history_item_t history_file_contents_t::decode_item(size_t offset) const {
|
||||||
|
const char *base = address_at(offset);
|
||||||
|
size_t len = this->length() - offset;
|
||||||
|
switch (this->type()) {
|
||||||
|
case history_type_fish_2_0:
|
||||||
|
return decode_item_fish_2_0(base, len);
|
||||||
|
case history_type_fish_1_x:
|
||||||
|
return decode_item_fish_1_x(base, len);
|
||||||
|
}
|
||||||
|
return history_item_t(L"");
|
||||||
|
}
|
||||||
|
|
||||||
|
maybe_t<size_t> history_file_contents_t::offset_of_next_item(size_t *cursor, time_t cutoff) {
|
||||||
|
size_t offset = size_t(-1);
|
||||||
|
switch (this->type()) {
|
||||||
|
case history_type_fish_2_0:
|
||||||
|
offset = offset_of_next_item_fish_2_0(*this, cursor, cutoff);
|
||||||
|
break;
|
||||||
|
case history_type_fish_1_x:
|
||||||
|
offset = offset_of_next_item_fish_1_x(this->begin(), this->length(), cursor);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (offset == size_t(-1)) {
|
||||||
|
return none();
|
||||||
|
}
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read one line, stripping off any newline, and updating cursor. Note that our input string is NOT
|
||||||
|
/// null terminated; it's just a memory mapped file.
|
||||||
|
static size_t read_line(const char *base, size_t cursor, size_t len, std::string &result) {
|
||||||
|
// Locate the newline.
|
||||||
|
assert(cursor <= len);
|
||||||
|
const char *start = base + cursor;
|
||||||
|
const char *a_newline = (char *)std::memchr(start, '\n', len - cursor);
|
||||||
|
if (a_newline != NULL) { // we found a newline
|
||||||
|
result.assign(start, a_newline - start);
|
||||||
|
// Return the amount to advance the cursor; skip over the newline.
|
||||||
|
return a_newline - start + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We ran off the end.
|
||||||
|
result.clear();
|
||||||
|
return len - cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Trims leading spaces in the given string, returning how many there were.
|
||||||
|
static size_t trim_leading_spaces(std::string &str) {
|
||||||
|
size_t i = 0, max = str.size();
|
||||||
|
while (i < max && str[i] == ' ') i++;
|
||||||
|
str.erase(0, i);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool extract_prefix_and_unescape_yaml(std::string *key, std::string *value,
|
||||||
|
const std::string &line) {
|
||||||
|
size_t where = line.find(':');
|
||||||
|
if (where != std::string::npos) {
|
||||||
|
key->assign(line, 0, where);
|
||||||
|
|
||||||
|
// Skip a space after the : if necessary.
|
||||||
|
size_t val_start = where + 1;
|
||||||
|
if (val_start < line.size() && line.at(val_start) == ' ') val_start++;
|
||||||
|
value->assign(line, val_start, line.size() - val_start);
|
||||||
|
|
||||||
|
unescape_yaml_fish_2_0(key);
|
||||||
|
unescape_yaml_fish_2_0(value);
|
||||||
|
}
|
||||||
|
return where != std::string::npos;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode an item via the fish 2.0 format.
|
||||||
|
static history_item_t decode_item_fish_2_0(const char *base, size_t len) {
|
||||||
|
wcstring cmd;
|
||||||
|
time_t when = 0;
|
||||||
|
path_list_t paths;
|
||||||
|
|
||||||
|
size_t indent = 0, cursor = 0;
|
||||||
|
std::string key, value, line;
|
||||||
|
|
||||||
|
// Read the "- cmd:" line.
|
||||||
|
size_t advance = read_line(base, cursor, len, line);
|
||||||
|
trim_leading_spaces(line);
|
||||||
|
if (!extract_prefix_and_unescape_yaml(&key, &value, line) || key != "- cmd") {
|
||||||
|
goto done; //!OCLINT(goto is the cleanest way to handle bad input)
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor += advance;
|
||||||
|
cmd = str2wcstring(value);
|
||||||
|
|
||||||
|
// Read the remaining lines.
|
||||||
|
for (;;) {
|
||||||
|
size_t advance = read_line(base, cursor, len, line);
|
||||||
|
|
||||||
|
size_t this_indent = trim_leading_spaces(line);
|
||||||
|
if (indent == 0) indent = this_indent;
|
||||||
|
|
||||||
|
if (this_indent == 0 || indent != this_indent) break;
|
||||||
|
|
||||||
|
if (!extract_prefix_and_unescape_yaml(&key, &value, line)) break;
|
||||||
|
|
||||||
|
// We are definitely going to consume this line.
|
||||||
|
cursor += advance;
|
||||||
|
|
||||||
|
if (key == "when") {
|
||||||
|
// Parse an int from the timestamp. Should this fail, strtol returns 0; that's
|
||||||
|
// acceptable.
|
||||||
|
char *end = NULL;
|
||||||
|
long tmp = strtol(value.c_str(), &end, 0);
|
||||||
|
when = tmp;
|
||||||
|
} else if (key == "paths") {
|
||||||
|
// Read lines starting with " - " until we can't read any more.
|
||||||
|
for (;;) {
|
||||||
|
size_t advance = read_line(base, cursor, len, line);
|
||||||
|
if (trim_leading_spaces(line) <= indent) break;
|
||||||
|
|
||||||
|
if (std::strncmp(line.c_str(), "- ", 2)) break;
|
||||||
|
|
||||||
|
// We're going to consume this line.
|
||||||
|
cursor += advance;
|
||||||
|
|
||||||
|
// Skip the leading dash-space and then store this path it.
|
||||||
|
line.erase(0, 2);
|
||||||
|
unescape_yaml_fish_2_0(&line);
|
||||||
|
paths.push_back(str2wcstring(line));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
done:
|
||||||
|
history_item_t result(cmd, when);
|
||||||
|
result.set_required_paths(paths);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a timestamp line that looks like this: spaces, "when:", spaces, timestamp, newline
|
||||||
|
/// The string is NOT null terminated; however we do know it contains a newline, so stop when we
|
||||||
|
/// reach it.
|
||||||
|
static bool parse_timestamp(const char *str, time_t *out_when) {
|
||||||
|
const char *cursor = str;
|
||||||
|
// Advance past spaces.
|
||||||
|
while (*cursor == ' ') cursor++;
|
||||||
|
|
||||||
|
// Look for "when:".
|
||||||
|
size_t when_len = 5;
|
||||||
|
if (std::strncmp(cursor, "when:", when_len) != 0) return false;
|
||||||
|
cursor += when_len;
|
||||||
|
|
||||||
|
// Advance past spaces.
|
||||||
|
while (*cursor == ' ') cursor++;
|
||||||
|
|
||||||
|
// Try to parse a timestamp.
|
||||||
|
long timestamp = 0;
|
||||||
|
if (isdigit(*cursor) && (timestamp = strtol(cursor, NULL, 0)) > 0) {
|
||||||
|
*out_when = (time_t)timestamp;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a pointer to the start of the next line, or NULL. The next line must itself end with a
|
||||||
|
/// newline. Note that the string is not null terminated.
|
||||||
|
static const char *next_line(const char *start, size_t length) {
|
||||||
|
// Handle the hopeless case.
|
||||||
|
if (length < 1) return NULL;
|
||||||
|
|
||||||
|
// Get a pointer to the end, that we must not pass.
|
||||||
|
const char *const end = start + length;
|
||||||
|
|
||||||
|
// Skip past the next newline.
|
||||||
|
const char *nextline = (const char *)std::memchr(start, '\n', length);
|
||||||
|
if (!nextline || nextline >= end) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
// Skip past the newline character itself.
|
||||||
|
if (++nextline >= end) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure this new line is itself "newline terminated". If it's not, return NULL.
|
||||||
|
const char *next_newline = (const char *)std::memchr(nextline, '\n', end - nextline);
|
||||||
|
if (!next_newline) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nextline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Support for iteratively locating the offsets of history items.
|
||||||
|
/// Pass the file contents and a pointer to a cursor size_t, initially 0.
|
||||||
|
/// If custoff_timestamp is nonzero, skip items created at or after that timestamp.
|
||||||
|
/// Returns (size_t)-1 when done.
|
||||||
|
static size_t offset_of_next_item_fish_2_0(const history_file_contents_t &contents,
|
||||||
|
size_t *inout_cursor, time_t cutoff_timestamp) {
|
||||||
|
size_t cursor = *inout_cursor;
|
||||||
|
size_t result = size_t(-1);
|
||||||
|
const size_t length = contents.length();
|
||||||
|
const char *const begin = contents.begin();
|
||||||
|
const char *const end = contents.end();
|
||||||
|
while (cursor < length) {
|
||||||
|
const char *line_start = contents.address_at(cursor);
|
||||||
|
|
||||||
|
// Advance the cursor to the next line.
|
||||||
|
const char *a_newline = (const char *)std::memchr(line_start, '\n', length - cursor);
|
||||||
|
if (a_newline == NULL) break;
|
||||||
|
|
||||||
|
// Advance the cursor past this line. +1 is for the newline.
|
||||||
|
cursor = a_newline - begin + 1;
|
||||||
|
|
||||||
|
// Skip lines with a leading space, since these are in the interior of one of our items.
|
||||||
|
if (line_start[0] == ' ') continue;
|
||||||
|
|
||||||
|
// Skip very short lines to make one of the checks below easier.
|
||||||
|
if (a_newline - line_start < 3) continue;
|
||||||
|
|
||||||
|
// Try to be a little YAML compatible. Skip lines with leading %, ---, or ...
|
||||||
|
if (!std::memcmp(line_start, "%", 1) || !std::memcmp(line_start, "---", 3) ||
|
||||||
|
!std::memcmp(line_start, "...", 3))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Hackish: fish 1.x rewriting a fish 2.0 history file can produce lines with lots of
|
||||||
|
// leading "- cmd: - cmd: - cmd:". Trim all but one leading "- cmd:".
|
||||||
|
const char *double_cmd = "- cmd: - cmd: ";
|
||||||
|
const size_t double_cmd_len = std::strlen(double_cmd);
|
||||||
|
while ((size_t)(a_newline - line_start) > double_cmd_len &&
|
||||||
|
!std::memcmp(line_start, double_cmd, double_cmd_len)) {
|
||||||
|
// Skip over just one of the - cmd. In the end there will be just one left.
|
||||||
|
line_start += std::strlen("- cmd: ");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Hackish: fish 1.x rewriting a fish 2.0 history file can produce commands like "when:
|
||||||
|
// 123456". Ignore those.
|
||||||
|
const char *cmd_when = "- cmd: when:";
|
||||||
|
const size_t cmd_when_len = std::strlen(cmd_when);
|
||||||
|
if ((size_t)(a_newline - line_start) >= cmd_when_len &&
|
||||||
|
!std::memcmp(line_start, cmd_when, cmd_when_len)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// At this point, we know line_start is at the beginning of an item. But maybe we want to
|
||||||
|
// skip this item because of timestamps. A 0 cutoff means we don't care; if we do care, then
|
||||||
|
// try parsing out a timestamp.
|
||||||
|
if (cutoff_timestamp != 0) {
|
||||||
|
// Hackish fast way to skip items created after our timestamp. This is the mechanism by
|
||||||
|
// which we avoid "seeing" commands from other sessions that started after we started.
|
||||||
|
// We try hard to ensure that our items are sorted by their timestamps, so in theory we
|
||||||
|
// could just break, but I don't think that works well if (for example) the clock
|
||||||
|
// changes. So we'll read all subsequent items.
|
||||||
|
// Walk over lines that we think are interior. These lines are not null terminated, but
|
||||||
|
// are guaranteed to contain a newline.
|
||||||
|
bool has_timestamp = false;
|
||||||
|
time_t timestamp = 0;
|
||||||
|
const char *interior_line;
|
||||||
|
|
||||||
|
for (interior_line = next_line(line_start, end - line_start);
|
||||||
|
interior_line != NULL && !has_timestamp;
|
||||||
|
interior_line = next_line(interior_line, end - interior_line)) {
|
||||||
|
// If the first character is not a space, it's not an interior line, so we're done.
|
||||||
|
if (interior_line[0] != ' ') break;
|
||||||
|
|
||||||
|
// Hackish optimization: since we just stepped over some interior line, update the
|
||||||
|
// cursor so we don't have to look at these lines next time.
|
||||||
|
cursor = interior_line - begin;
|
||||||
|
|
||||||
|
// Try parsing a timestamp from this line. If we succeed, the loop will break.
|
||||||
|
has_timestamp = parse_timestamp(interior_line, ×tamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip this item if the timestamp is past our cutoff.
|
||||||
|
if (has_timestamp && timestamp > cutoff_timestamp) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We made it through the gauntlet.
|
||||||
|
result = line_start - begin;
|
||||||
|
break; //!OCLINT(avoid branching statement as last in loop)
|
||||||
|
}
|
||||||
|
|
||||||
|
*inout_cursor = cursor;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove backslashes from all newlines. This makes a string from the history file better formated
|
||||||
|
/// for on screen display.
|
||||||
|
static wcstring history_unescape_newlines_fish_1_x(const wcstring &in_str) {
|
||||||
|
wcstring out;
|
||||||
|
for (const wchar_t *in = in_str.c_str(); *in; in++) {
|
||||||
|
if (*in == L'\\') {
|
||||||
|
if (*(in + 1) != L'\n') {
|
||||||
|
out.push_back(*in);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
out.push_back(*in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode an item via the fish 1.x format. Adapted from fish 1.x's item_get().
|
||||||
|
static history_item_t decode_item_fish_1_x(const char *begin, size_t length) {
|
||||||
|
const char *end = begin + length;
|
||||||
|
const char *pos = begin;
|
||||||
|
wcstring out;
|
||||||
|
bool was_backslash = false;
|
||||||
|
bool first_char = true;
|
||||||
|
bool timestamp_mode = false;
|
||||||
|
time_t timestamp = 0;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
wchar_t c;
|
||||||
|
size_t res;
|
||||||
|
mbstate_t state = {};
|
||||||
|
|
||||||
|
if (MB_CUR_MAX == 1) { // single-byte locale
|
||||||
|
c = (unsigned char)*pos;
|
||||||
|
res = 1;
|
||||||
|
} else {
|
||||||
|
res = std::mbrtowc(&c, pos, end - pos, &state);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (res == (size_t)-1) {
|
||||||
|
pos++;
|
||||||
|
continue;
|
||||||
|
} else if (res == (size_t)-2) {
|
||||||
|
break;
|
||||||
|
} else if (res == (size_t)0) {
|
||||||
|
pos++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
pos += res;
|
||||||
|
|
||||||
|
if (c == L'\n') {
|
||||||
|
if (timestamp_mode) {
|
||||||
|
const wchar_t *time_string = out.c_str();
|
||||||
|
while (*time_string && !iswdigit(*time_string)) time_string++;
|
||||||
|
|
||||||
|
if (*time_string) {
|
||||||
|
time_t tm = (time_t)fish_wcstol(time_string);
|
||||||
|
if (!errno && tm >= 0) {
|
||||||
|
timestamp = tm;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
out.clear();
|
||||||
|
timestamp_mode = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!was_backslash) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (first_char) {
|
||||||
|
first_char = false;
|
||||||
|
if (c == L'#') timestamp_mode = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
out.push_back(c);
|
||||||
|
was_backslash = (c == L'\\') && !was_backslash;
|
||||||
|
}
|
||||||
|
|
||||||
|
out = history_unescape_newlines_fish_1_x(out);
|
||||||
|
return history_item_t(out, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Same as offset_of_next_item_fish_2_0, but for fish 1.x (pre fishfish).
|
||||||
|
/// Adapted from history_populate_from_mmap in history.c
|
||||||
|
static size_t offset_of_next_item_fish_1_x(const char *begin, size_t mmap_length,
|
||||||
|
size_t *inout_cursor) {
|
||||||
|
if (mmap_length == 0 || *inout_cursor >= mmap_length) return (size_t)-1;
|
||||||
|
|
||||||
|
const char *end = begin + mmap_length;
|
||||||
|
const char *pos;
|
||||||
|
bool ignore_newline = false;
|
||||||
|
bool do_push = true;
|
||||||
|
bool all_done = false;
|
||||||
|
size_t result = *inout_cursor;
|
||||||
|
|
||||||
|
for (pos = begin + *inout_cursor; pos < end && !all_done; pos++) {
|
||||||
|
if (do_push) {
|
||||||
|
ignore_newline = (*pos == '#');
|
||||||
|
do_push = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*pos == '\\') {
|
||||||
|
pos++;
|
||||||
|
} else if (*pos == '\n') {
|
||||||
|
if (!ignore_newline) {
|
||||||
|
// pos will be left pointing just after this newline, because of the ++ in the loop.
|
||||||
|
all_done = true;
|
||||||
|
}
|
||||||
|
ignore_newline = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
*inout_cursor = (pos - begin);
|
||||||
|
return result;
|
||||||
|
}
|
72
src/history_file.h
Normal file
72
src/history_file.h
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
#ifndef FISH_HISTORY_FILE_H
|
||||||
|
#define FISH_HISTORY_FILE_H
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
|
||||||
|
#include "maybe.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include <sys/mman.h>
|
||||||
|
|
||||||
|
// History file types.
|
||||||
|
enum history_file_type_t { history_type_fish_2_0, history_type_fish_1_x };
|
||||||
|
|
||||||
|
/// history_file_contents_t holds the read-only contents of a file.
|
||||||
|
class history_file_contents_t {
|
||||||
|
public:
|
||||||
|
/// Construct a history file contents from a file descriptor. The file descriptor is not closed.
|
||||||
|
static std::unique_ptr<history_file_contents_t> create(int fd);
|
||||||
|
|
||||||
|
/// Decode an item at a given offset.
|
||||||
|
history_item_t decode_item(size_t offset) const;
|
||||||
|
|
||||||
|
/// Support for iterating item offsets.
|
||||||
|
/// The cursor should initially be 0.
|
||||||
|
/// If cutoff is nonzero, skip items whose timestamp is newer than cutoff.
|
||||||
|
/// \return the offset of the next item, or none() on end.
|
||||||
|
maybe_t<size_t> offset_of_next_item(size_t *cursor, time_t cutoff);
|
||||||
|
|
||||||
|
/// Get the file type.
|
||||||
|
history_file_type_t type() const { return type_; }
|
||||||
|
|
||||||
|
/// Get the size of the contents.
|
||||||
|
size_t length() const { return length_; }
|
||||||
|
|
||||||
|
/// Return a pointer to the beginning.
|
||||||
|
const char *begin() const { return address_at(0); }
|
||||||
|
|
||||||
|
/// Return a pointer to one-past-the-end.
|
||||||
|
const char *end() const { return address_at(length_); }
|
||||||
|
|
||||||
|
/// Access the address at a given offset.
|
||||||
|
const char *address_at(size_t offset) const {
|
||||||
|
assert(offset <= length_ && "Invalid offset");
|
||||||
|
return start_ + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
~history_file_contents_t();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// The memory mapped pointer.
|
||||||
|
const char *start_;
|
||||||
|
|
||||||
|
// The mapped length.
|
||||||
|
const size_t length_;
|
||||||
|
|
||||||
|
// The type of the mapped file.
|
||||||
|
const history_file_type_t type_;
|
||||||
|
|
||||||
|
// Private constructor; use the static create() function.
|
||||||
|
history_file_contents_t(const char *mmap_start, size_t mmap_length, history_file_type_t type);
|
||||||
|
|
||||||
|
history_file_contents_t(history_file_contents_t &&) = delete;
|
||||||
|
void operator=(history_file_contents_t &&) = delete;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Support for escaping and unescaping the nonstandard "yaml" format introduced in fish 2.0.
|
||||||
|
void escape_yaml_fish_2_0(std::string *str);
|
||||||
|
void unescape_yaml_fish_2_0(std::string *str);
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue