mirror of
https://github.com/fish-shell/fish-shell
synced 2024-12-26 04:43:10 +00:00
Removed XXHash and converted some wchar_t* to wcstring
This commit is contained in:
parent
d54fbddb11
commit
a77cd98136
16 changed files with 42 additions and 388 deletions
|
@ -55,7 +55,7 @@ class autoload_t : public lru_cache_t<autoload_t, autoload_function_t> {
|
|||
wcstring_list_t last_path_tokenized;
|
||||
/// A table containing all the files that are currently being loaded.
|
||||
/// This is here to help prevent recursion.
|
||||
std::unordered_set<wcstring, wcstring_hash> is_loading_set;
|
||||
std::unordered_set<wcstring> is_loading_set;
|
||||
// Function invoked when a command is removed
|
||||
typedef void (*command_removed_function_t)(const wcstring &);
|
||||
const command_removed_function_t command_removed;
|
||||
|
|
|
@ -66,7 +66,7 @@ class argparse_cmd_opts_t {
|
|||
wcstring_list_t raw_exclusive_flags;
|
||||
wcstring_list_t argv;
|
||||
std::unordered_map<wchar_t, option_spec_t *> options;
|
||||
std::unordered_map<wcstring, wchar_t, wcstring_hash> long_to_short_flag;
|
||||
std::unordered_map<wcstring, wchar_t> long_to_short_flag;
|
||||
std::vector<std::vector<wchar_t>> exclusive_flag_sets;
|
||||
|
||||
~argparse_cmd_opts_t() {
|
||||
|
|
19
src/common.h
19
src/common.h
|
@ -831,27 +831,18 @@ enum {
|
|||
#define ignore_result(x) ((void)(x))
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
// Custom hash function used by unordered_map/unordered_set when key is const
|
||||
#ifndef CONST_WCSTRING_HASH
|
||||
#define CONST_WCSTRING_HASH 1
|
||||
#include "xxhash32.h"
|
||||
#include "xxhash64.h"
|
||||
inline size_t xxhash(const void *t, size_t size) {
|
||||
#if __SIZEOF_POINTER__ == __SIZEOF_INT__
|
||||
return XXHash32::hash(t, size, 0);
|
||||
#else
|
||||
return XXHash64::hash(t, size, 0);
|
||||
}
|
||||
struct wcstring_hash {
|
||||
size_t operator()(const wcstring &w) const { return xxhash(w.c_str(), w.size()); }
|
||||
};
|
||||
namespace std {
|
||||
template <>
|
||||
struct hash<const wcstring> {
|
||||
std::size_t operator()(const wcstring &w) const { return xxhash(w.c_str(), w.size()); }
|
||||
std::size_t operator()(const wcstring &w) const {
|
||||
std::hash<wcstring> hasher;
|
||||
return hasher((wcstring)w);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -162,7 +162,7 @@ namespace std {
|
|||
template<>
|
||||
struct hash<completion_entry_t> {
|
||||
size_t operator()(const completion_entry_t &c) const {
|
||||
wcstring_hash hasher;
|
||||
std::hash<wcstring> hasher;
|
||||
return hasher((wcstring) c.cmd);
|
||||
}
|
||||
};
|
||||
|
@ -297,7 +297,7 @@ class completer_t {
|
|||
|
||||
/// Table of completions conditions that have already been tested and the corresponding test
|
||||
/// results.
|
||||
typedef std::unordered_map<wcstring, bool, wcstring_hash> condition_cache_t;
|
||||
typedef std::unordered_map<wcstring, bool> condition_cache_t;
|
||||
condition_cache_t condition_cache;
|
||||
|
||||
enum complete_type_t { COMPLETE_DEFAULT, COMPLETE_AUTOSUGGEST };
|
||||
|
@ -600,7 +600,7 @@ void completer_t::complete_cmd_desc(const wcstring &str) {
|
|||
wcstring lookup_cmd(L"__fish_describe_command ");
|
||||
lookup_cmd.append(escape_string(cmd_start, 1));
|
||||
|
||||
std::unordered_map<wcstring, wcstring, wcstring_hash> lookup;
|
||||
std::unordered_map<wcstring, wcstring> lookup;
|
||||
|
||||
// First locate a list of possible descriptions using a single call to apropos or a direct
|
||||
// search if we know the location of the whatis database. This can take some time on slower
|
||||
|
@ -1557,7 +1557,7 @@ wcstring complete_print() {
|
|||
|
||||
/// Completion "wrapper" support. The map goes from wrapping-command to wrapped-command-list.
|
||||
static std::mutex wrapper_lock;
|
||||
typedef std::unordered_map<wcstring, wcstring_list_t, wcstring_hash> wrapper_map_t;
|
||||
typedef std::unordered_map<wcstring, wcstring_list_t> wrapper_map_t;
|
||||
static wrapper_map_t &wrap_map() {
|
||||
ASSERT_IS_LOCKED(wrapper_lock);
|
||||
// A pointer is a little more efficient than an object as a static because we can elide the
|
||||
|
@ -1614,7 +1614,7 @@ wcstring_list_t complete_get_wrap_chain(const wcstring &command) {
|
|||
const wrapper_map_t &wraps = wrap_map();
|
||||
|
||||
wcstring_list_t result;
|
||||
std::unordered_set<wcstring, wcstring_hash> visited; // set of visited commands
|
||||
std::unordered_set<wcstring> visited; // set of visited commands
|
||||
wcstring_list_t to_visit(1, command); // stack of remaining-to-visit commands
|
||||
|
||||
wcstring target;
|
||||
|
|
41
src/env.cpp
41
src/env.cpp
|
@ -323,35 +323,20 @@ static env_universal_t *uvars() { return s_universal_variables; }
|
|||
// Helper class for storing constant strings, without needing to wrap them in a wcstring.
|
||||
|
||||
// Comparer for const string set.
|
||||
struct const_string_set_comparer {
|
||||
bool operator()(const wchar_t *a, const wchar_t *b) { return wcscmp(a, b) < 0; }
|
||||
};
|
||||
namespace std {
|
||||
template<>
|
||||
struct hash<const wchar_t *> {
|
||||
size_t operator()(const wchar_t *p) const { return xxhash(p, wcslen(p)); }
|
||||
};
|
||||
template <>
|
||||
struct equal_to<const wchar_t *> {
|
||||
bool operator()(const wchar_t *a, const wchar_t *b) const {
|
||||
return wcscmp(a, b) == 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
typedef std::unordered_set<const wchar_t *> const_string_set_t;
|
||||
typedef std::unordered_set<wcstring> const_string_set_t;
|
||||
|
||||
/// Table of variables that may not be set using the set command.
|
||||
static const_string_set_t env_read_only;
|
||||
|
||||
static bool is_read_only(const wcstring &key) {
|
||||
return env_read_only.find(key.c_str()) != env_read_only.end();
|
||||
return env_read_only.find(key) != env_read_only.end();
|
||||
}
|
||||
|
||||
/// Table of variables whose value is dynamically calculated, such as umask, status, etc.
|
||||
static const_string_set_t env_electric;
|
||||
|
||||
static bool is_electric(const wcstring &key) {
|
||||
return env_electric.find(key.c_str()) != env_electric.end();
|
||||
return env_electric.find(key) != env_electric.end();
|
||||
}
|
||||
|
||||
const env_var_t env_node_t::find_entry(const wcstring &key) {
|
||||
|
@ -877,18 +862,20 @@ void env_init(const struct config_paths_t *paths /* or NULL */) {
|
|||
setup_var_dispatch_table();
|
||||
|
||||
// These variables can not be altered directly by the user.
|
||||
const wchar_t *const ro_keys[] = {
|
||||
L"status", L"history", L"_", L"PWD", L"FISH_VERSION",
|
||||
for (auto &k : {
|
||||
wcstring(L"status"),
|
||||
wcstring(L"history"),
|
||||
wcstring(L"_"),
|
||||
wcstring(L"PWD"),
|
||||
wcstring(L"FISH_VERSION") }) {
|
||||
env_read_only.emplace(std::move(k));
|
||||
// L"SHLVL" is readonly but will be inserted below after we increment it.
|
||||
};
|
||||
for (size_t i = 0; i < sizeof ro_keys / sizeof *ro_keys; i++) {
|
||||
env_read_only.insert(ro_keys[i]);
|
||||
}
|
||||
|
||||
// Names of all dynamically calculated variables.
|
||||
env_electric.insert(L"history");
|
||||
env_electric.insert(L"status");
|
||||
env_electric.insert(L"umask");
|
||||
env_electric.emplace(L"history");
|
||||
env_electric.emplace(L"status");
|
||||
env_electric.emplace(L"umask");
|
||||
|
||||
// Now the environment variable handling is set up, the next step is to insert valid data.
|
||||
|
||||
|
@ -960,7 +947,7 @@ void env_init(const struct config_paths_t *paths /* or NULL */) {
|
|||
}
|
||||
}
|
||||
env_set_one(L"SHLVL", ENV_GLOBAL | ENV_EXPORT, nshlvl_str);
|
||||
env_read_only.insert(L"SHLVL");
|
||||
env_read_only.emplace(L"SHLVL");
|
||||
|
||||
// Set up the HOME variable.
|
||||
// Unlike $USER, it doesn't seem that `su`s pass this along
|
||||
|
|
|
@ -34,7 +34,7 @@ class env_universal_t {
|
|||
|
||||
// Keys that have been modified, and need to be written. A value here that is not present in
|
||||
// vars indicates a deleted value.
|
||||
std::unordered_set<wcstring, wcstring_hash> modified;
|
||||
std::unordered_set<wcstring> modified;
|
||||
|
||||
// Path that we save to. If empty, use the default.
|
||||
const wcstring explicit_vars_path;
|
||||
|
|
|
@ -67,7 +67,7 @@ static const wchar_t *const highlight_var[] = {L"fish_color_normal",
|
|||
/// Returns:
|
||||
/// false: the filesystem is not case insensitive
|
||||
/// true: the file system is case insensitive
|
||||
typedef std::unordered_map<wcstring, bool, wcstring_hash> case_sensitivity_cache_t;
|
||||
typedef std::unordered_map<wcstring, bool> case_sensitivity_cache_t;
|
||||
bool fs_is_case_insensitive(const wcstring &path, int fd,
|
||||
case_sensitivity_cache_t &case_sensitivity_cache) {
|
||||
bool result = false;
|
||||
|
@ -146,7 +146,7 @@ bool is_potential_path(const wcstring &potential_path_fragment, const wcstring_l
|
|||
|
||||
// Don't test the same path multiple times, which can happen if the path is absolute and the
|
||||
// CDPATH contains multiple entries.
|
||||
std::unordered_set<wcstring, wcstring_hash> checked_paths;
|
||||
std::unordered_set<wcstring> checked_paths;
|
||||
|
||||
// Keep a cache of which paths / filesystems are case sensitive.
|
||||
case_sensitivity_cache_t case_sensitivity_cache;
|
||||
|
|
|
@ -139,7 +139,7 @@ class history_t {
|
|||
uint32_t disable_automatic_save_counter;
|
||||
|
||||
// Deleted item contents.
|
||||
std::unordered_set<wcstring, wcstring_hash> deleted_items;
|
||||
std::unordered_set<wcstring> deleted_items;
|
||||
|
||||
// The mmaped region for the history file.
|
||||
const char *mmap_start;
|
||||
|
|
|
@ -45,7 +45,7 @@ class lru_cache_t {
|
|||
explicit lru_node_t(const CONTENTS &v) : value(std::move(v)) {}
|
||||
};
|
||||
|
||||
typedef typename std::unordered_map<wcstring, lru_node_t, wcstring_hash>::iterator node_iter_t;
|
||||
typedef typename std::unordered_map<wcstring, lru_node_t>::iterator node_iter_t;
|
||||
|
||||
// Max node count. This may be (transiently) exceeded by add_node_without_eviction, which is
|
||||
// used from background threads.
|
||||
|
@ -54,7 +54,7 @@ class lru_cache_t {
|
|||
// All of our nodes
|
||||
// Note that our linked list contains pointers to these nodes in the map
|
||||
// We are dependent on the iterator-noninvalidation guarantees of std::map
|
||||
std::unordered_map<wcstring, lru_node_t, wcstring_hash> node_map;
|
||||
std::unordered_map<wcstring, lru_node_t> node_map;
|
||||
|
||||
// Head of the linked list
|
||||
// The list is circular!
|
||||
|
|
|
@ -267,7 +267,7 @@ static void mangle_1_completion_description(wcstring *str) {
|
|||
static void join_completions(comp_info_list_t *comps) {
|
||||
// A map from description to index in the completion list of the element with that description.
|
||||
// The indexes are stored +1.
|
||||
std::unordered_map<wcstring, size_t, wcstring_hash> desc_table;
|
||||
std::unordered_map<wcstring, size_t> desc_table;
|
||||
|
||||
// Note that we mutate the completion list as we go, so the size changes.
|
||||
for (size_t i = 0; i < comps->size(); i++) {
|
||||
|
|
|
@ -203,7 +203,7 @@ size_t escape_code_length(const wchar_t *code);
|
|||
class cached_esc_sequences_t {
|
||||
private:
|
||||
// Cached escape sequences we've already detected in the prompt and similar strings.
|
||||
std::unordered_set<wcstring, wcstring_hash> cache;
|
||||
std::unordered_set<wcstring> cache;
|
||||
// The escape sequence lengths we've cached. My original implementation used min and max
|
||||
// length variables. The cache was then iterated over using a loop like this:
|
||||
// `for (size_t l = min; l <= max; l++)`.
|
||||
|
|
|
@ -439,7 +439,7 @@ class wildcard_expander_t {
|
|||
// The working directory to resolve paths against
|
||||
const wcstring working_directory;
|
||||
// The set of items we have resolved, used to efficiently avoid duplication.
|
||||
std::unordered_set<wcstring, wcstring_hash> completion_set;
|
||||
std::unordered_set<wcstring> completion_set;
|
||||
// The set of file IDs we have visited, used to avoid symlink loops.
|
||||
std::unordered_set<file_id_t> visited_files;
|
||||
// Flags controlling expansion.
|
||||
|
|
|
@ -38,7 +38,7 @@ const file_id_t kInvalidFileID = {(dev_t)-1LL, (ino_t)-1LL, (uint64_t)-1LL, -1,
|
|||
#endif
|
||||
|
||||
/// Map used as cache by wgettext.
|
||||
static owning_lock<std::unordered_map<wcstring, wcstring, wcstring_hash>> wgettext_map;
|
||||
static owning_lock<std::unordered_map<wcstring, wcstring>> wgettext_map;
|
||||
|
||||
bool wreaddir_resolving(DIR *dir, const wcstring &dir_path, wcstring &out_name, bool *out_is_dir) {
|
||||
struct dirent d;
|
||||
|
|
|
@ -148,7 +148,12 @@ struct file_id_t {
|
|||
namespace std {
|
||||
template<>
|
||||
struct hash<file_id_t> {
|
||||
size_t operator()(const file_id_t &f) const { return xxhash(&f, sizeof(f)); }
|
||||
size_t operator()(const file_id_t &f) const {
|
||||
std::hash<decltype(f.device)> hasher1;
|
||||
std::hash<decltype(f.inode)> hasher2;
|
||||
|
||||
return hasher1(f.device) ^ hasher2(f.inode);
|
||||
}
|
||||
};
|
||||
}
|
||||
#endif
|
||||
|
|
155
src/xxhash32.h
155
src/xxhash32.h
|
@ -1,155 +0,0 @@
|
|||
// //////////////////////////////////////////////////////////
|
||||
// xxhash32.h
|
||||
// Copyright (c) 2016 Stephan Brumme. All rights reserved.
|
||||
// see http://create.stephan-brumme.com/disclaimer.html
|
||||
//
|
||||
#pragma once
|
||||
#include <stdint.h> // for uint32_t and uint64_t
|
||||
/// XXHash (32 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/
|
||||
/** How to use:
|
||||
uint32_t myseed = 0;
|
||||
XXHash32 myhash(myseed);
|
||||
myhash.add(pointerToSomeBytes, numberOfBytes);
|
||||
myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ...
|
||||
// and compute hash:
|
||||
uint32_t result = myhash.hash();
|
||||
// or all of the above in one single line:
|
||||
uint32_t result2 = XXHash32::hash(mypointer, numBytes, myseed);
|
||||
Note: my code is NOT endian-aware !
|
||||
**/
|
||||
class XXHash32
|
||||
{
|
||||
public:
|
||||
/// create new XXHash (32 bit)
|
||||
/** @param seed your seed value, even zero is a valid seed and e.g. used by LZ4 **/
|
||||
explicit XXHash32(uint32_t seed)
|
||||
{
|
||||
state[0] = seed + Prime1 + Prime2;
|
||||
state[1] = seed + Prime2;
|
||||
state[2] = seed;
|
||||
state[3] = seed - Prime1;
|
||||
bufferSize = 0;
|
||||
totalLength = 0;
|
||||
}
|
||||
/// add a chunk of bytes
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@return false if parameters are invalid / zero **/
|
||||
bool add(const void* input, uint64_t length)
|
||||
{
|
||||
// no data ?
|
||||
if (!input || length == 0)
|
||||
return false;
|
||||
totalLength += length;
|
||||
// byte-wise access
|
||||
const unsigned char* data = (const unsigned char*)input;
|
||||
// unprocessed old data plus new data still fit in temporary buffer ?
|
||||
if (bufferSize + length < MaxBufferSize)
|
||||
{
|
||||
// just add new data
|
||||
while (length-- > 0)
|
||||
buffer[bufferSize++] = *data++;
|
||||
return true;
|
||||
}
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + length;
|
||||
const unsigned char* stopBlock = stop - MaxBufferSize;
|
||||
// some data left from previous update ?
|
||||
if (bufferSize > 0)
|
||||
{
|
||||
// make sure temporary buffer is full (16 bytes)
|
||||
while (bufferSize < MaxBufferSize)
|
||||
buffer[bufferSize++] = *data++;
|
||||
// process these 16 bytes (4x4)
|
||||
process(buffer, state[0], state[1], state[2], state[3]);
|
||||
}
|
||||
// copying state to local variables helps optimizer A LOT
|
||||
uint32_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
|
||||
// 16 bytes at once
|
||||
while (data <= stopBlock)
|
||||
{
|
||||
// local variables s0..s3 instead of state[0]..state[3] are much faster
|
||||
process(data, s0, s1, s2, s3);
|
||||
data += 16;
|
||||
}
|
||||
// copy back
|
||||
state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
|
||||
// copy remainder to temporary buffer
|
||||
bufferSize = stop - data;
|
||||
for (unsigned int i = 0; i < bufferSize; i++)
|
||||
buffer[i] = data[i];
|
||||
// done
|
||||
return true;
|
||||
}
|
||||
/// get current hash
|
||||
/** @return 32 bit XXHash **/
|
||||
uint32_t hash() const
|
||||
{
|
||||
uint32_t result = (uint32_t)totalLength;
|
||||
// fold 128 bit state into one single 32 bit value
|
||||
if (totalLength >= MaxBufferSize)
|
||||
result += rotateLeft(state[0], 1) +
|
||||
rotateLeft(state[1], 7) +
|
||||
rotateLeft(state[2], 12) +
|
||||
rotateLeft(state[3], 18);
|
||||
else
|
||||
// internal state wasn't set in add(), therefore original seed is still stored in state2
|
||||
result += state[2] + Prime5;
|
||||
// process remaining bytes in temporary buffer
|
||||
const unsigned char* data = buffer;
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + bufferSize;
|
||||
// at least 4 bytes left ? => eat 4 bytes per step
|
||||
for (; data + 4 <= stop; data += 4)
|
||||
result = rotateLeft(result + *(uint32_t*)data * Prime3, 17) * Prime4;
|
||||
// take care of remaining 0..3 bytes, eat 1 byte per step
|
||||
while (data != stop)
|
||||
result = rotateLeft(result + (*data++) * Prime5, 11) * Prime1;
|
||||
// mix bits
|
||||
result ^= result >> 15;
|
||||
result *= Prime2;
|
||||
result ^= result >> 13;
|
||||
result *= Prime3;
|
||||
result ^= result >> 16;
|
||||
return result;
|
||||
}
|
||||
/// combine constructor, add() and hash() in one static function (C style)
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@param seed your seed value, e.g. zero is a valid seed and used by LZ4
|
||||
@return 32 bit XXHash **/
|
||||
static uint32_t hash(const void* input, uint64_t length, uint32_t seed)
|
||||
{
|
||||
XXHash32 hasher(seed);
|
||||
hasher.add(input, length);
|
||||
return hasher.hash();
|
||||
}
|
||||
private:
|
||||
/// magic constants :-)
|
||||
static const uint32_t Prime1 = 2654435761U;
|
||||
static const uint32_t Prime2 = 2246822519U;
|
||||
static const uint32_t Prime3 = 3266489917U;
|
||||
static const uint32_t Prime4 = 668265263U;
|
||||
static const uint32_t Prime5 = 374761393U;
|
||||
/// temporarily store up to 15 bytes between multiple add() calls
|
||||
static const uint32_t MaxBufferSize = 15+1;
|
||||
// internal state and temporary buffer
|
||||
uint32_t state[4]; // state[2] == seed if totalLength < MaxBufferSize
|
||||
unsigned char buffer[MaxBufferSize];
|
||||
unsigned int bufferSize;
|
||||
uint64_t totalLength;
|
||||
/// rotate bits, should compile to a single CPU instruction (ROL)
|
||||
static inline uint32_t rotateLeft(uint32_t x, unsigned char bits)
|
||||
{
|
||||
return (x << bits) | (x >> (32 - bits));
|
||||
}
|
||||
/// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm
|
||||
static inline void process(const void* data, uint32_t& state0, uint32_t& state1, uint32_t& state2, uint32_t& state3)
|
||||
{
|
||||
const uint32_t* block = (const uint32_t*) data;
|
||||
state0 = rotateLeft(state0 + block[0] * Prime2, 13) * Prime1;
|
||||
state1 = rotateLeft(state1 + block[1] * Prime2, 13) * Prime1;
|
||||
state2 = rotateLeft(state2 + block[2] * Prime2, 13) * Prime1;
|
||||
state3 = rotateLeft(state3 + block[3] * Prime2, 13) * Prime1;
|
||||
}
|
||||
};
|
174
src/xxhash64.h
174
src/xxhash64.h
|
@ -1,174 +0,0 @@
|
|||
// //////////////////////////////////////////////////////////
|
||||
// xxhash64.h
|
||||
// Copyright (c) 2016 Stephan Brumme. All rights reserved.
|
||||
// see http://create.stephan-brumme.com/disclaimer.html
|
||||
//
|
||||
#pragma once
|
||||
#include <stdint.h> // for uint32_t and uint64_t
|
||||
/// XXHash (64 bit), based on Yann Collet's descriptions, see http://cyan4973.github.io/xxHash/
|
||||
/** How to use:
|
||||
uint64_t myseed = 0;
|
||||
XXHash64 myhash(myseed);
|
||||
myhash.add(pointerToSomeBytes, numberOfBytes);
|
||||
myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ...
|
||||
// and compute hash:
|
||||
uint64_t result = myhash.hash();
|
||||
// or all of the above in one single line:
|
||||
uint64_t result2 = XXHash64::hash(mypointer, numBytes, myseed);
|
||||
Note: my code is NOT endian-aware !
|
||||
**/
|
||||
class XXHash64
|
||||
{
|
||||
public:
|
||||
/// create new XXHash (64 bit)
|
||||
/** @param seed your seed value, even zero is a valid seed **/
|
||||
explicit XXHash64(uint64_t seed)
|
||||
{
|
||||
state[0] = seed + Prime1 + Prime2;
|
||||
state[1] = seed + Prime2;
|
||||
state[2] = seed;
|
||||
state[3] = seed - Prime1;
|
||||
bufferSize = 0;
|
||||
totalLength = 0;
|
||||
}
|
||||
/// add a chunk of bytes
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@return false if parameters are invalid / zero **/
|
||||
bool add(const void* input, uint64_t length)
|
||||
{
|
||||
// no data ?
|
||||
if (!input || length == 0)
|
||||
return false;
|
||||
totalLength += length;
|
||||
// byte-wise access
|
||||
const unsigned char* data = (const unsigned char*)input;
|
||||
// unprocessed old data plus new data still fit in temporary buffer ?
|
||||
if (bufferSize + length < MaxBufferSize)
|
||||
{
|
||||
// just add new data
|
||||
while (length-- > 0)
|
||||
buffer[bufferSize++] = *data++;
|
||||
return true;
|
||||
}
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + length;
|
||||
const unsigned char* stopBlock = stop - MaxBufferSize;
|
||||
// some data left from previous update ?
|
||||
if (bufferSize > 0)
|
||||
{
|
||||
// make sure temporary buffer is full (16 bytes)
|
||||
while (bufferSize < MaxBufferSize)
|
||||
buffer[bufferSize++] = *data++;
|
||||
// process these 32 bytes (4x8)
|
||||
process(buffer, state[0], state[1], state[2], state[3]);
|
||||
}
|
||||
// copying state to local variables helps optimizer A LOT
|
||||
uint64_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
|
||||
// 32 bytes at once
|
||||
while (data <= stopBlock)
|
||||
{
|
||||
// local variables s0..s3 instead of state[0]..state[3] are much faster
|
||||
process(data, s0, s1, s2, s3);
|
||||
data += 32;
|
||||
}
|
||||
// copy back
|
||||
state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
|
||||
// copy remainder to temporary buffer
|
||||
bufferSize = stop - data;
|
||||
for (unsigned int i = 0; i < bufferSize; i++)
|
||||
buffer[i] = data[i];
|
||||
// done
|
||||
return true;
|
||||
}
|
||||
/// get current hash
|
||||
/** @return 64 bit XXHash **/
|
||||
uint64_t hash() const
|
||||
{
|
||||
// fold 256 bit state into one single 64 bit value
|
||||
uint64_t result;
|
||||
if (totalLength >= MaxBufferSize)
|
||||
{
|
||||
result = rotateLeft(state[0], 1) +
|
||||
rotateLeft(state[1], 7) +
|
||||
rotateLeft(state[2], 12) +
|
||||
rotateLeft(state[3], 18);
|
||||
result = (result ^ processSingle(0, state[0])) * Prime1 + Prime4;
|
||||
result = (result ^ processSingle(0, state[1])) * Prime1 + Prime4;
|
||||
result = (result ^ processSingle(0, state[2])) * Prime1 + Prime4;
|
||||
result = (result ^ processSingle(0, state[3])) * Prime1 + Prime4;
|
||||
}
|
||||
else
|
||||
{
|
||||
// internal state wasn't set in add(), therefore original seed is still stored in state2
|
||||
result = state[2] + Prime5;
|
||||
}
|
||||
result += totalLength;
|
||||
// process remaining bytes in temporary buffer
|
||||
const unsigned char* data = buffer;
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + bufferSize;
|
||||
// at least 8 bytes left ? => eat 8 bytes per step
|
||||
for (; data + 8 <= stop; data += 8)
|
||||
result = rotateLeft(result ^ processSingle(0, *(uint64_t*)data), 27) * Prime1 + Prime4;
|
||||
// 4 bytes left ? => eat those
|
||||
if (data + 4 <= stop)
|
||||
{
|
||||
result = rotateLeft(result ^ (*(uint32_t*)data) * Prime1, 23) * Prime2 + Prime3;
|
||||
data += 4;
|
||||
}
|
||||
// take care of remaining 0..3 bytes, eat 1 byte per step
|
||||
while (data != stop)
|
||||
result = rotateLeft(result ^ (*data++) * Prime5, 11) * Prime1;
|
||||
// mix bits
|
||||
result ^= result >> 33;
|
||||
result *= Prime2;
|
||||
result ^= result >> 29;
|
||||
result *= Prime3;
|
||||
result ^= result >> 32;
|
||||
return result;
|
||||
}
|
||||
/// combine constructor, add() and hash() in one static function (C style)
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@param seed your seed value, e.g. zero is a valid seed
|
||||
@return 64 bit XXHash **/
|
||||
static uint64_t hash(const void* input, uint64_t length, uint64_t seed)
|
||||
{
|
||||
XXHash64 hasher(seed);
|
||||
hasher.add(input, length);
|
||||
return hasher.hash();
|
||||
}
|
||||
private:
|
||||
/// magic constants :-)
|
||||
static const uint64_t Prime1 = 11400714785074694791ULL;
|
||||
static const uint64_t Prime2 = 14029467366897019727ULL;
|
||||
static const uint64_t Prime3 = 1609587929392839161ULL;
|
||||
static const uint64_t Prime4 = 9650029242287828579ULL;
|
||||
static const uint64_t Prime5 = 2870177450012600261ULL;
|
||||
/// temporarily store up to 31 bytes between multiple add() calls
|
||||
static const uint64_t MaxBufferSize = 31+1;
|
||||
uint64_t state[4];
|
||||
unsigned char buffer[MaxBufferSize];
|
||||
unsigned int bufferSize;
|
||||
uint64_t totalLength;
|
||||
/// rotate bits, should compile to a single CPU instruction (ROL)
|
||||
static inline uint64_t rotateLeft(uint64_t x, unsigned char bits)
|
||||
{
|
||||
return (x << bits) | (x >> (64 - bits));
|
||||
}
|
||||
/// process a single 64 bit value
|
||||
static inline uint64_t processSingle(uint64_t previous, uint64_t input)
|
||||
{
|
||||
return rotateLeft(previous + input * Prime2, 31) * Prime1;
|
||||
}
|
||||
/// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm
|
||||
static inline void process(const void* data, uint64_t& state0, uint64_t& state1, uint64_t& state2, uint64_t& state3)
|
||||
{
|
||||
const uint64_t* block = (const uint64_t*) data;
|
||||
state0 = processSingle(state0, block[0]);
|
||||
state1 = processSingle(state1, block[1]);
|
||||
state2 = processSingle(state2, block[2]);
|
||||
state3 = processSingle(state3, block[3]);
|
||||
}
|
||||
};
|
Loading…
Reference in a new issue