fish-shell/src/util.cpp
Aaron Gyes 14d2a6d8ff IWYU-guided #include rejiggering.
Let's hope this doesn't causes build failures for e.g. musl: I just
know it's good on macOS and our Linux CI.

It's been a long time.

One fix this brings, is I discovered we #include assert.h or cassert
in a lot of places. If those ever happen to be in a file that doesn't
include common.h, or we are before common.h gets included, we're
unawaringly working with the system 'assert' macro again, which
may get disabled for debug builds or at least has different
behavior on crash. We undef 'assert' and redefine it in common.h.

Those were all eliminated, except in one catch-22 spot for
maybe.h: it can't include common.h. A fix might be to
make a fish_assert.h that *usually* common.h exports.
2022-08-20 23:55:18 -07:00

199 lines
7 KiB
C++

// Generic utilities library.
#include "config.h" // IWYU pragma: keep
#include "util.h"
#include <stddef.h>
#include <sys/time.h>
#include <wctype.h>
#include <cwchar>
#include "common.h"
#include "fallback.h" // IWYU pragma: keep
#include "wutil.h" // IWYU pragma: keep
// Compare the strings to see if they begin with an integer that can be compared and return the
// result of that comparison.
static int wcsfilecmp_leading_digits(const wchar_t **a, const wchar_t **b) {
const wchar_t *a1 = *a;
const wchar_t *b1 = *b;
// Ignore leading 0s.
while (*a1 == L'0') a1++;
while (*b1 == L'0') b1++;
int ret = 0;
while (true) {
if (iswdigit(*a1) && iswdigit(*b1)) {
// We keep the cmp value for the
// first differing digit.
//
// If the numbers have the same length, that's the value.
if (ret == 0) {
// Comparing the string value is the same as numerical
// for wchar_t digits!
if (*a1 > *b1) ret = 1;
if (*b1 > *a1) ret = -1;
}
} else {
// We don't have negative numbers and we only allow ints,
// and we have already skipped leading zeroes,
// so the longer number is larger automatically.
if (iswdigit(*a1)) ret = 1;
if (iswdigit(*b1)) ret = -1;
break;
}
a1++;
b1++;
}
// For historical reasons, we skip trailing whitespace
// like fish_wcstol does!
// This is used in sorting globs, and that's supposed to be stable.
while (iswspace(*a1)) a1++;
while (iswspace(*b1)) b1++;
*a = a1;
*b = b1;
return ret;
}
/// Compare two strings, representing file names, using "natural" ordering. This means that letter
/// case is ignored. It also means that integers in each string are compared based on the decimal
/// value rather than the string representation. It only handles base 10 integers and they can
/// appear anywhere in each string, including multiple integers. This means that a file name like
/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123.
///
/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter
/// appears before the former.
///
/// This does not handle esoterica like Unicode combining characters. Nor does it use collating
/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher
/// Unicode code point. In part because doing so is really hard without the help of something like
/// the ICU library. But also because file names might be in a different encoding than is used by
/// the current fish process which results in weird situations. This is basically a best effort
/// implementation that will do the right thing 99.99% of the time.
///
/// Returns: -1 if a < b, 0 if a == b, 1 if a > b.
int wcsfilecmp(const wchar_t *a, const wchar_t *b) {
assert(a && b && "Null parameter");
const wchar_t *orig_a = a;
const wchar_t *orig_b = b;
int retval = 0; // assume the strings will be equal
while (*a && *b) {
if (iswdigit(*a) && iswdigit(*b)) {
retval = wcsfilecmp_leading_digits(&a, &b);
// If we know the strings aren't logically equal or we've reached the end of one or both
// strings we can stop iterating over the chars in each string.
if (retval || *a == 0 || *b == 0) break;
}
// Fast path: Skip towupper.
if (*a == *b) {
a++;
b++;
continue;
}
wint_t al = towupper(*a);
wint_t bl = towupper(*b);
// Sort dashes after Z - see #5634
if (al == L'-') al = L'[';
if (bl == L'-') bl = L'[';
if (al < bl) {
retval = -1;
break;
} else if (al > bl) {
retval = 1;
break;
} else {
a++;
b++;
}
}
if (retval != 0) return retval; // we already know the strings aren't logically equal
if (*a == 0) {
if (*b == 0) {
// The strings are logically equal. They may or may not be the same length depending on
// whether numbers were present but that doesn't matter. Disambiguate strings that
// differ by letter case or length. We don't bother optimizing the case where the file
// names are literally identical because that won't occur given how this function is
// used. And even if it were to occur (due to being reused in some other context) it
// would be so rare that it isn't worth optimizing for.
retval = std::wcscmp(orig_a, orig_b);
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
}
return -1; // string a is a prefix of b and b is longer
}
assert(*b == 0);
return 1; // string b is a prefix of a and a is longer
}
/// wcsfilecmp, but frozen in time for glob usage.
int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b) {
assert(a && b && "Null parameter");
const wchar_t *orig_a = a;
const wchar_t *orig_b = b;
int retval = 0; // assume the strings will be equal
while (*a && *b) {
if (iswdigit(*a) && iswdigit(*b)) {
retval = wcsfilecmp_leading_digits(&a, &b);
// If we know the strings aren't logically equal or we've reached the end of one or both
// strings we can stop iterating over the chars in each string.
if (retval || *a == 0 || *b == 0) break;
}
// Fast path: Skip towlower.
if (*a == *b) {
a++;
b++;
continue;
}
wint_t al = towlower(*a);
wint_t bl = towlower(*b);
if (al < bl) {
retval = -1;
break;
} else if (al > bl) {
retval = 1;
break;
} else {
a++;
b++;
}
}
if (retval != 0) return retval; // we already know the strings aren't logically equal
if (*a == 0) {
if (*b == 0) {
// The strings are logically equal. They may or may not be the same length depending on
// whether numbers were present but that doesn't matter. Disambiguate strings that
// differ by letter case or length. We don't bother optimizing the case where the file
// names are literally identical because that won't occur given how this function is
// used. And even if it were to occur (due to being reused in some other context) it
// would be so rare that it isn't worth optimizing for.
retval = wcscmp(orig_a, orig_b);
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
}
return -1; // string a is a prefix of b and b is longer
}
assert(*b == 0);
return 1; // string b is a prefix of a and a is longer
}
/// Return microseconds since the epoch.
long long get_time() {
struct timeval time_struct;
gettimeofday(&time_struct, nullptr);
return 1000000LL * time_struct.tv_sec + time_struct.tv_usec;
}