mirror of
https://github.com/fish-shell/fish-shell
synced 2024-11-10 07:04:29 +00:00
Port util.cpp to Rust
The original implementation without the test took me 3 hours (first time seriously looking into this) The functions take "wcharz_t" for smooth integration with existing C++ callers. This is at the expense of Rust callers, which would prefer "&wstr". Would be nice to declare a function parameter that accepts both but I don't think that really works since "wcharz_t" drops the lifetime annotation.
This commit is contained in:
parent
44d75409d0
commit
517d53dc46
7 changed files with 328 additions and 290 deletions
|
@ -126,8 +126,8 @@ set(FISH_SRCS
|
|||
src/parser.cpp src/parser_keywords.cpp src/path.cpp src/postfork.cpp
|
||||
src/proc.cpp src/re.cpp src/reader.cpp src/redirection.cpp src/screen.cpp
|
||||
src/signals.cpp src/termsize.cpp src/timer.cpp src/tinyexpr.cpp
|
||||
src/tokenizer.cpp src/trace.cpp src/utf8.cpp src/util.cpp
|
||||
src/wait_handle.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp
|
||||
src/tokenizer.cpp src/trace.cpp src/utf8.cpp
|
||||
src/wait_handle.cpp src/wcstringutil.cpp src/wgetopt.cpp src/wildcard.cpp
|
||||
src/wutil.cpp src/fds.cpp src/rustffi.cpp
|
||||
)
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ fn main() -> miette::Result<()> {
|
|||
"src/ffi_tests.rs",
|
||||
"src/smoke.rs",
|
||||
"src/topic_monitor.rs",
|
||||
"src/util.rs",
|
||||
"src/builtins/shared.rs",
|
||||
];
|
||||
cxx_build::bridges(&source_files)
|
||||
|
|
|
@ -15,6 +15,7 @@ mod flog;
|
|||
mod signal;
|
||||
mod smoke;
|
||||
mod topic_monitor;
|
||||
mod util;
|
||||
mod wchar;
|
||||
mod wchar_ext;
|
||||
mod wchar_ffi;
|
||||
|
|
315
fish-rust/src/util.rs
Normal file
315
fish-rust/src/util.rs
Normal file
|
@ -0,0 +1,315 @@
|
|||
//! Generic utilities library.
|
||||
|
||||
use crate::ffi::wcharz_t;
|
||||
use crate::wchar::wstr;
|
||||
use std::time;
|
||||
|
||||
#[cxx::bridge]
|
||||
mod ffi {
|
||||
extern "C++" {
|
||||
include!("wutil.h");
|
||||
type wcharz_t = super::wcharz_t;
|
||||
}
|
||||
|
||||
extern "Rust" {
|
||||
fn wcsfilecmp(a: wcharz_t, b: wcharz_t) -> i32;
|
||||
fn wcsfilecmp_glob(a: wcharz_t, b: wcharz_t) -> i32;
|
||||
fn get_time() -> i64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Compares two wide character strings with an (arguably) intuitive ordering. This function tries
|
||||
/// to order strings in a way which is intuitive to humans with regards to sorting strings
|
||||
/// containing numbers.
|
||||
///
|
||||
/// Most sorting functions would sort the strings 'file1.txt' 'file5.txt' and 'file12.txt' as:
|
||||
///
|
||||
/// file1.txt
|
||||
/// file12.txt
|
||||
/// file5.txt
|
||||
///
|
||||
/// This function regards any sequence of digits as a single entity when performing comparisons, so
|
||||
/// the output is instead:
|
||||
///
|
||||
/// file1.txt
|
||||
/// file5.txt
|
||||
/// file12.txt
|
||||
///
|
||||
/// Which most people would find more intuitive.
|
||||
///
|
||||
/// This won't return the optimum results for numbers in bases higher than ten, such as hexadecimal,
|
||||
/// but at least a stable sort order will result.
|
||||
///
|
||||
/// This function performs a two-tiered sort, where difference in case and in number of leading
|
||||
/// zeroes in numbers only have effect if no other differences between strings are found. This way,
|
||||
/// a 'file1' and 'File1' will not be considered identical, and hence their internal sort order is
|
||||
/// not arbitrary, but the names 'file1', 'File2' and 'file3' will still be sorted in the order
|
||||
/// given above.
|
||||
pub fn wcsfilecmp(a: wcharz_t, b: wcharz_t) -> i32 {
|
||||
// TODO This should return `std::cmp::Ordering`.
|
||||
let a: &wstr = a.into();
|
||||
let b: &wstr = b.into();
|
||||
let mut retval = 0;
|
||||
let mut ai = 0;
|
||||
let mut bi = 0;
|
||||
while ai < a.len() && bi < b.len() {
|
||||
let ac = a.as_char_slice()[ai];
|
||||
let bc = b.as_char_slice()[bi];
|
||||
if ac.is_ascii_digit() && bc.is_ascii_digit() {
|
||||
let (ad, bd);
|
||||
(retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]);
|
||||
ai += ad;
|
||||
bi += bd;
|
||||
if retval != 0 || ai == a.len() || bi == b.len() {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fast path: Skip towupper.
|
||||
if ac == bc {
|
||||
ai += 1;
|
||||
bi += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort dashes after Z - see #5634
|
||||
let mut acl = if ac == '-' { '[' } else { ac };
|
||||
let mut bcl = if bc == '-' { '[' } else { bc };
|
||||
// TODO Compare the tail (enabled by Rust's Unicode support).
|
||||
acl = acl.to_uppercase().next().unwrap();
|
||||
bcl = bcl.to_uppercase().next().unwrap();
|
||||
|
||||
if acl < bcl {
|
||||
retval = -1;
|
||||
break;
|
||||
} else if acl > bcl {
|
||||
retval = 1;
|
||||
break;
|
||||
} else {
|
||||
ai += 1;
|
||||
bi += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if retval != 0 {
|
||||
return retval; // we already know the strings aren't logically equal
|
||||
}
|
||||
|
||||
if ai == a.len() {
|
||||
if bi == b.len() {
|
||||
// The strings are logically equal. They may or may not be the same length depending on
|
||||
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
||||
// differ by letter case or length. We don't bother optimizing the case where the file
|
||||
// names are literally identical because that won't occur given how this function is
|
||||
// used. And even if it were to occur (due to being reused in some other context) it
|
||||
// would be so rare that it isn't worth optimizing for.
|
||||
match a.cmp(b) {
|
||||
std::cmp::Ordering::Less => -1,
|
||||
std::cmp::Ordering::Equal => 0,
|
||||
std::cmp::Ordering::Greater => 1,
|
||||
}
|
||||
} else {
|
||||
-1 // string a is a prefix of b and b is longer
|
||||
}
|
||||
} else {
|
||||
assert!(bi == b.len());
|
||||
return 1; // string b is a prefix of a and a is longer
|
||||
}
|
||||
}
|
||||
|
||||
/// wcsfilecmp, but frozen in time for glob usage.
|
||||
pub fn wcsfilecmp_glob(a: wcharz_t, b: wcharz_t) -> i32 {
|
||||
// TODO This should return `std::cmp::Ordering`.
|
||||
let a: &wstr = a.into();
|
||||
let b: &wstr = b.into();
|
||||
let mut retval = 0;
|
||||
let mut ai = 0;
|
||||
let mut bi = 0;
|
||||
while ai < a.len() && bi < b.len() {
|
||||
let ac = a.as_char_slice()[ai];
|
||||
let bc = b.as_char_slice()[bi];
|
||||
if ac.is_ascii_digit() && bc.is_ascii_digit() {
|
||||
let (ad, bd);
|
||||
(retval, ad, bd) = wcsfilecmp_leading_digits(&a[ai..], &b[bi..]);
|
||||
ai += ad;
|
||||
bi += bd;
|
||||
// If we know the strings aren't logically equal or we've reached the end of one or both
|
||||
// strings we can stop iterating over the chars in each string.
|
||||
if retval != 0 || ai == a.len() || bi == b.len() {
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fast path: Skip towlower.
|
||||
if ac == bc {
|
||||
ai += 1;
|
||||
bi += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO Compare the tail (enabled by Rust's Unicode support).
|
||||
let acl = ac.to_lowercase().next().unwrap();
|
||||
let bcl = bc.to_lowercase().next().unwrap();
|
||||
if acl < bcl {
|
||||
retval = -1;
|
||||
break;
|
||||
} else if acl > bcl {
|
||||
retval = 1;
|
||||
break;
|
||||
} else {
|
||||
ai += 1;
|
||||
bi += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if retval != 0 {
|
||||
return retval; // we already know the strings aren't logically equal
|
||||
}
|
||||
|
||||
if ai == a.len() {
|
||||
if bi == b.len() {
|
||||
// The strings are logically equal. They may or may not be the same length depending on
|
||||
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
||||
// differ by letter case or length. We don't bother optimizing the case where the file
|
||||
// names are literally identical because that won't occur given how this function is
|
||||
// used. And even if it were to occur (due to being reused in some other context) it
|
||||
// would be so rare that it isn't worth optimizing for.
|
||||
match a.cmp(b) {
|
||||
std::cmp::Ordering::Less => -1,
|
||||
std::cmp::Ordering::Equal => 0,
|
||||
std::cmp::Ordering::Greater => 1,
|
||||
}
|
||||
} else {
|
||||
-1 // string a is a prefix of b and b is longer
|
||||
}
|
||||
} else {
|
||||
assert!(bi == b.len());
|
||||
return 1; // string b is a prefix of a and a is longer
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current time in microseconds since Jan 1, 1970.
|
||||
pub fn get_time() -> i64 {
|
||||
match time::SystemTime::now().duration_since(time::UNIX_EPOCH) {
|
||||
Ok(difference) => difference.as_micros() as i64,
|
||||
Err(until_epoch) => -(until_epoch.duration().as_micros() as i64),
|
||||
}
|
||||
}
|
||||
|
||||
// Compare the strings to see if they begin with an integer that can be compared and return the
|
||||
// result of that comparison.
|
||||
fn wcsfilecmp_leading_digits(a: &wstr, b: &wstr) -> (i32, usize, usize) {
|
||||
// Ignore leading 0s.
|
||||
let mut ai = a.as_char_slice().iter().take_while(|c| **c == '0').count();
|
||||
let mut bi = b.as_char_slice().iter().take_while(|c| **c == '0').count();
|
||||
|
||||
let mut ret = 0;
|
||||
loop {
|
||||
let ac = a.as_char_slice().get(ai).unwrap_or(&'\0');
|
||||
let bc = b.as_char_slice().get(bi).unwrap_or(&'\0');
|
||||
if ac.is_ascii_digit() && bc.is_ascii_digit() {
|
||||
// We keep the cmp value for the
|
||||
// first differing digit.
|
||||
//
|
||||
// If the numbers have the same length, that's the value.
|
||||
if ret == 0 {
|
||||
// Comparing the string value is the same as numerical
|
||||
// for wchar_t digits!
|
||||
if ac > bc {
|
||||
ret = 1;
|
||||
}
|
||||
if bc > ac {
|
||||
ret = -1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We don't have negative numbers and we only allow ints,
|
||||
// and we have already skipped leading zeroes,
|
||||
// so the longer number is larger automatically.
|
||||
if ac.is_ascii_digit() {
|
||||
ret = 1;
|
||||
}
|
||||
if bc.is_ascii_digit() {
|
||||
ret = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ai += 1;
|
||||
bi += 1;
|
||||
}
|
||||
|
||||
// For historical reasons, we skip trailing whitespace
|
||||
// like fish_wcstol does!
|
||||
// This is used in sorting globs, and that's supposed to be stable.
|
||||
ai += a
|
||||
.as_char_slice()
|
||||
.iter()
|
||||
.skip(ai)
|
||||
.take_while(|c| c.is_whitespace())
|
||||
.count();
|
||||
bi += b
|
||||
.as_char_slice()
|
||||
.iter()
|
||||
.skip(bi)
|
||||
.take_while(|c| c.is_whitespace())
|
||||
.count();
|
||||
(ret, ai, bi)
|
||||
}
|
||||
|
||||
/// Verify the behavior of the `wcsfilecmp()` function.
|
||||
#[test]
|
||||
fn test_wcsfilecmp() {
|
||||
use crate::wchar::L;
|
||||
use crate::wchar_ffi::wcharz;
|
||||
|
||||
macro_rules! validate {
|
||||
($str1:expr, $str2:expr, $expected_rc:expr) => {
|
||||
assert_eq!(
|
||||
wcsfilecmp(wcharz!(L!($str1)), wcharz!(L!($str2))),
|
||||
$expected_rc
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
// Not using L as suffix because the macro munges error locations.
|
||||
validate!("", "", 0);
|
||||
validate!("", "def", -1);
|
||||
validate!("abc", "", 1);
|
||||
validate!("abc", "def", -1);
|
||||
validate!("abc", "DEF", -1);
|
||||
validate!("DEF", "abc", 1);
|
||||
validate!("abc", "abc", 0);
|
||||
validate!("ABC", "ABC", 0);
|
||||
validate!("AbC", "abc", -1);
|
||||
validate!("AbC", "ABC", 1);
|
||||
validate!("def", "abc", 1);
|
||||
validate!("1ghi", "1gHi", 1);
|
||||
validate!("1ghi", "2ghi", -1);
|
||||
validate!("1ghi", "01ghi", 1);
|
||||
validate!("1ghi", "02ghi", -1);
|
||||
validate!("01ghi", "1ghi", -1);
|
||||
validate!("1ghi", "002ghi", -1);
|
||||
validate!("002ghi", "1ghi", 1);
|
||||
validate!("abc01def", "abc1def", -1);
|
||||
validate!("abc1def", "abc01def", 1);
|
||||
validate!("abc12", "abc5", 1);
|
||||
validate!("51abc", "050abc", 1);
|
||||
validate!("abc5", "abc12", -1);
|
||||
validate!("5abc", "12ABC", -1);
|
||||
validate!("abc0789", "abc789", -1);
|
||||
validate!("abc0xA789", "abc0xA0789", 1);
|
||||
validate!("abc002", "abc2", -1);
|
||||
validate!("abc002g", "abc002", 1);
|
||||
validate!("abc002g", "abc02g", -1);
|
||||
validate!("abc002.txt", "abc02.txt", -1);
|
||||
validate!("abc005", "abc012", -1);
|
||||
validate!("abc02", "abc002", 1);
|
||||
validate!("abc002.txt", "abc02.txt", -1);
|
||||
validate!("GHI1abc2.txt", "ghi1abc2.txt", -1);
|
||||
validate!("a0", "a00", -1);
|
||||
validate!("a00b", "a0b", -1);
|
||||
validate!("a0b", "a00b", 1);
|
||||
validate!("a-b", "azb", 1);
|
||||
}
|
|
@ -1589,62 +1589,6 @@ static void test_parse_util_cmdsubst_extent() {
|
|||
}
|
||||
}
|
||||
|
||||
static struct wcsfilecmp_test {
|
||||
const wchar_t *str1;
|
||||
const wchar_t *str2;
|
||||
int expected_rc;
|
||||
} wcsfilecmp_tests[] = {{L"", L"", 0},
|
||||
{L"", L"def", -1},
|
||||
{L"abc", L"", 1},
|
||||
{L"abc", L"def", -1},
|
||||
{L"abc", L"DEF", -1},
|
||||
{L"DEF", L"abc", 1},
|
||||
{L"abc", L"abc", 0},
|
||||
{L"ABC", L"ABC", 0},
|
||||
{L"AbC", L"abc", -1},
|
||||
{L"AbC", L"ABC", 1},
|
||||
{L"def", L"abc", 1},
|
||||
{L"1ghi", L"1gHi", 1},
|
||||
{L"1ghi", L"2ghi", -1},
|
||||
{L"1ghi", L"01ghi", 1},
|
||||
{L"1ghi", L"02ghi", -1},
|
||||
{L"01ghi", L"1ghi", -1},
|
||||
{L"1ghi", L"002ghi", -1},
|
||||
{L"002ghi", L"1ghi", 1},
|
||||
{L"abc01def", L"abc1def", -1},
|
||||
{L"abc1def", L"abc01def", 1},
|
||||
{L"abc12", L"abc5", 1},
|
||||
{L"51abc", L"050abc", 1},
|
||||
{L"abc5", L"abc12", -1},
|
||||
{L"5abc", L"12ABC", -1},
|
||||
{L"abc0789", L"abc789", -1},
|
||||
{L"abc0xA789", L"abc0xA0789", 1},
|
||||
{L"abc002", L"abc2", -1},
|
||||
{L"abc002g", L"abc002", 1},
|
||||
{L"abc002g", L"abc02g", -1},
|
||||
{L"abc002.txt", L"abc02.txt", -1},
|
||||
{L"abc005", L"abc012", -1},
|
||||
{L"abc02", L"abc002", 1},
|
||||
{L"abc002.txt", L"abc02.txt", -1},
|
||||
{L"GHI1abc2.txt", L"ghi1abc2.txt", -1},
|
||||
{L"a0", L"a00", -1},
|
||||
{L"a00b", L"a0b", -1},
|
||||
{L"a0b", L"a00b", 1},
|
||||
{L"a-b", L"azb", 1},
|
||||
{nullptr, nullptr, 0}};
|
||||
|
||||
/// Verify the behavior of the `wcsfilecmp()` function.
|
||||
static void test_wcsfilecmp() {
|
||||
for (auto test = wcsfilecmp_tests; test->str1; test++) {
|
||||
int rc = wcsfilecmp(test->str1, test->str2);
|
||||
if (rc != test->expected_rc) {
|
||||
err(L"New failed on line %lu: [\"%ls\" <=> \"%ls\"]: "
|
||||
L"expected return code %d but got %d",
|
||||
__LINE__, test->str1, test->str2, test->expected_rc, rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_const_strlen() {
|
||||
do_test(const_strlen("") == 0);
|
||||
do_test(const_strlen(L"") == 0);
|
||||
|
@ -1788,7 +1732,6 @@ void test_dir_iter() {
|
|||
|
||||
static void test_utility_functions() {
|
||||
say(L"Testing utility functions");
|
||||
test_wcsfilecmp();
|
||||
test_parse_util_cmdsubst_extent();
|
||||
test_const_strlen();
|
||||
test_const_strcmp();
|
||||
|
|
199
src/util.cpp
199
src/util.cpp
|
@ -1,199 +0,0 @@
|
|||
// Generic utilities library.
|
||||
#include "config.h" // IWYU pragma: keep
|
||||
|
||||
#include "util.h"
|
||||
|
||||
#include <stddef.h>
|
||||
#include <sys/time.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include <cwchar>
|
||||
|
||||
#include "common.h"
|
||||
#include "fallback.h" // IWYU pragma: keep
|
||||
#include "wutil.h" // IWYU pragma: keep
|
||||
|
||||
// Compare the strings to see if they begin with an integer that can be compared and return the
|
||||
// result of that comparison.
|
||||
static int wcsfilecmp_leading_digits(const wchar_t **a, const wchar_t **b) {
|
||||
const wchar_t *a1 = *a;
|
||||
const wchar_t *b1 = *b;
|
||||
|
||||
// Ignore leading 0s.
|
||||
while (*a1 == L'0') a1++;
|
||||
while (*b1 == L'0') b1++;
|
||||
|
||||
int ret = 0;
|
||||
|
||||
while (true) {
|
||||
if (iswdigit(*a1) && iswdigit(*b1)) {
|
||||
// We keep the cmp value for the
|
||||
// first differing digit.
|
||||
//
|
||||
// If the numbers have the same length, that's the value.
|
||||
if (ret == 0) {
|
||||
// Comparing the string value is the same as numerical
|
||||
// for wchar_t digits!
|
||||
if (*a1 > *b1) ret = 1;
|
||||
if (*b1 > *a1) ret = -1;
|
||||
}
|
||||
} else {
|
||||
// We don't have negative numbers and we only allow ints,
|
||||
// and we have already skipped leading zeroes,
|
||||
// so the longer number is larger automatically.
|
||||
if (iswdigit(*a1)) ret = 1;
|
||||
if (iswdigit(*b1)) ret = -1;
|
||||
break;
|
||||
}
|
||||
a1++;
|
||||
b1++;
|
||||
}
|
||||
|
||||
// For historical reasons, we skip trailing whitespace
|
||||
// like fish_wcstol does!
|
||||
// This is used in sorting globs, and that's supposed to be stable.
|
||||
while (iswspace(*a1)) a1++;
|
||||
while (iswspace(*b1)) b1++;
|
||||
*a = a1;
|
||||
*b = b1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/// Compare two strings, representing file names, using "natural" ordering. This means that letter
|
||||
/// case is ignored. It also means that integers in each string are compared based on the decimal
|
||||
/// value rather than the string representation. It only handles base 10 integers and they can
|
||||
/// appear anywhere in each string, including multiple integers. This means that a file name like
|
||||
/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123.
|
||||
///
|
||||
/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter
|
||||
/// appears before the former.
|
||||
///
|
||||
/// This does not handle esoterica like Unicode combining characters. Nor does it use collating
|
||||
/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher
|
||||
/// Unicode code point. In part because doing so is really hard without the help of something like
|
||||
/// the ICU library. But also because file names might be in a different encoding than is used by
|
||||
/// the current fish process which results in weird situations. This is basically a best effort
|
||||
/// implementation that will do the right thing 99.99% of the time.
|
||||
///
|
||||
/// Returns: -1 if a < b, 0 if a == b, 1 if a > b.
|
||||
int wcsfilecmp(const wchar_t *a, const wchar_t *b) {
|
||||
assert(a && b && "Null parameter");
|
||||
const wchar_t *orig_a = a;
|
||||
const wchar_t *orig_b = b;
|
||||
int retval = 0; // assume the strings will be equal
|
||||
|
||||
while (*a && *b) {
|
||||
if (iswdigit(*a) && iswdigit(*b)) {
|
||||
retval = wcsfilecmp_leading_digits(&a, &b);
|
||||
// If we know the strings aren't logically equal or we've reached the end of one or both
|
||||
// strings we can stop iterating over the chars in each string.
|
||||
if (retval || *a == 0 || *b == 0) break;
|
||||
}
|
||||
|
||||
// Fast path: Skip towupper.
|
||||
if (*a == *b) {
|
||||
a++;
|
||||
b++;
|
||||
continue;
|
||||
}
|
||||
|
||||
wint_t al = towupper(*a);
|
||||
wint_t bl = towupper(*b);
|
||||
// Sort dashes after Z - see #5634
|
||||
if (al == L'-') al = L'[';
|
||||
if (bl == L'-') bl = L'[';
|
||||
|
||||
if (al < bl) {
|
||||
retval = -1;
|
||||
break;
|
||||
} else if (al > bl) {
|
||||
retval = 1;
|
||||
break;
|
||||
} else {
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
}
|
||||
|
||||
if (retval != 0) return retval; // we already know the strings aren't logically equal
|
||||
|
||||
if (*a == 0) {
|
||||
if (*b == 0) {
|
||||
// The strings are logically equal. They may or may not be the same length depending on
|
||||
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
||||
// differ by letter case or length. We don't bother optimizing the case where the file
|
||||
// names are literally identical because that won't occur given how this function is
|
||||
// used. And even if it were to occur (due to being reused in some other context) it
|
||||
// would be so rare that it isn't worth optimizing for.
|
||||
retval = std::wcscmp(orig_a, orig_b);
|
||||
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
|
||||
}
|
||||
return -1; // string a is a prefix of b and b is longer
|
||||
}
|
||||
|
||||
assert(*b == 0);
|
||||
return 1; // string b is a prefix of a and a is longer
|
||||
}
|
||||
|
||||
/// wcsfilecmp, but frozen in time for glob usage.
|
||||
int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b) {
|
||||
assert(a && b && "Null parameter");
|
||||
const wchar_t *orig_a = a;
|
||||
const wchar_t *orig_b = b;
|
||||
int retval = 0; // assume the strings will be equal
|
||||
|
||||
while (*a && *b) {
|
||||
if (iswdigit(*a) && iswdigit(*b)) {
|
||||
retval = wcsfilecmp_leading_digits(&a, &b);
|
||||
// If we know the strings aren't logically equal or we've reached the end of one or both
|
||||
// strings we can stop iterating over the chars in each string.
|
||||
if (retval || *a == 0 || *b == 0) break;
|
||||
}
|
||||
|
||||
// Fast path: Skip towlower.
|
||||
if (*a == *b) {
|
||||
a++;
|
||||
b++;
|
||||
continue;
|
||||
}
|
||||
|
||||
wint_t al = towlower(*a);
|
||||
wint_t bl = towlower(*b);
|
||||
if (al < bl) {
|
||||
retval = -1;
|
||||
break;
|
||||
} else if (al > bl) {
|
||||
retval = 1;
|
||||
break;
|
||||
} else {
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
}
|
||||
|
||||
if (retval != 0) return retval; // we already know the strings aren't logically equal
|
||||
|
||||
if (*a == 0) {
|
||||
if (*b == 0) {
|
||||
// The strings are logically equal. They may or may not be the same length depending on
|
||||
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
||||
// differ by letter case or length. We don't bother optimizing the case where the file
|
||||
// names are literally identical because that won't occur given how this function is
|
||||
// used. And even if it were to occur (due to being reused in some other context) it
|
||||
// would be so rare that it isn't worth optimizing for.
|
||||
retval = wcscmp(orig_a, orig_b);
|
||||
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
|
||||
}
|
||||
return -1; // string a is a prefix of b and b is longer
|
||||
}
|
||||
|
||||
assert(*b == 0);
|
||||
return 1; // string b is a prefix of a and a is longer
|
||||
}
|
||||
|
||||
/// Return microseconds since the epoch.
|
||||
long long get_time() {
|
||||
struct timeval time_struct;
|
||||
gettimeofday(&time_struct, nullptr);
|
||||
return 1000000LL * time_struct.tv_sec + time_struct.tv_usec;
|
||||
}
|
41
src/util.h
41
src/util.h
|
@ -1,40 +1,17 @@
|
|||
// Generic utilities library.
|
||||
#ifndef FISH_UTIL_H
|
||||
#define FISH_UTIL_H
|
||||
|
||||
/// Compares two wide character strings with an (arguably) intuitive ordering. This function tries
|
||||
/// to order strings in a way which is intuitive to humans with regards to sorting strings
|
||||
/// containing numbers.
|
||||
///
|
||||
/// Most sorting functions would sort the strings 'file1.txt' 'file5.txt' and 'file12.txt' as:
|
||||
///
|
||||
/// file1.txt
|
||||
/// file12.txt
|
||||
/// file5.txt
|
||||
///
|
||||
/// This function regards any sequence of digits as a single entity when performing comparisons, so
|
||||
/// the output is instead:
|
||||
///
|
||||
/// file1.txt
|
||||
/// file5.txt
|
||||
/// file12.txt
|
||||
///
|
||||
/// Which most people would find more intuitive.
|
||||
///
|
||||
/// This won't return the optimum results for numbers in bases higher than ten, such as hexadecimal,
|
||||
/// but at least a stable sort order will result.
|
||||
///
|
||||
/// This function performs a two-tiered sort, where difference in case and in number of leading
|
||||
/// zeroes in numbers only have effect if no other differences between strings are found. This way,
|
||||
/// a 'file1' and 'File1' will not be considered identical, and hence their internal sort order is
|
||||
/// not arbitrary, but the names 'file1', 'File2' and 'file3' will still be sorted in the order
|
||||
/// given above.
|
||||
#if INCLUDE_RUST_HEADERS
|
||||
|
||||
#include "util.rs.h"
|
||||
|
||||
#else
|
||||
|
||||
// Hacks to allow us to compile without Rust headers.
|
||||
int wcsfilecmp(const wchar_t *a, const wchar_t *b);
|
||||
|
||||
/// wcsfilecmp, but frozen in time for glob usage.
|
||||
int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b);
|
||||
|
||||
/// Get the current time in microseconds since Jan 1, 1970.
|
||||
long long get_time();
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in a new issue