2016-05-03 22:18:24 +00:00
|
|
|
// Generic utilities library.
|
2016-05-18 22:30:21 +00:00
|
|
|
#include "config.h" // IWYU pragma: keep
|
|
|
|
|
2019-10-13 22:50:48 +00:00
|
|
|
#include "util.h"
|
|
|
|
|
2016-05-03 22:18:24 +00:00
|
|
|
#include <errno.h>
|
2017-05-13 03:15:24 +00:00
|
|
|
#include <stddef.h>
|
2016-05-03 22:18:24 +00:00
|
|
|
#include <sys/time.h>
|
2005-09-20 13:26:39 +00:00
|
|
|
#include <wctype.h>
|
2019-10-13 22:50:48 +00:00
|
|
|
|
2019-05-05 10:09:25 +00:00
|
|
|
#include <cwchar>
|
2005-09-20 13:26:39 +00:00
|
|
|
|
2016-05-03 22:18:24 +00:00
|
|
|
#include "common.h"
|
2016-04-21 06:00:54 +00:00
|
|
|
#include "fallback.h" // IWYU pragma: keep
|
2019-10-13 22:50:48 +00:00
|
|
|
#include "wutil.h" // IWYU pragma: keep
|
2005-09-20 13:26:39 +00:00
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
// Compare the strings to see if they begin with an integer that can be compared and return the
|
|
|
|
// result of that comparison.
|
|
|
|
static int wcsfilecmp_leading_digits(const wchar_t **a, const wchar_t **b) {
|
2021-10-07 15:50:07 +00:00
|
|
|
const wchar_t *a1 = *a;
|
|
|
|
const wchar_t *b1 = *b;
|
|
|
|
|
|
|
|
// Ignore leading 0s.
|
|
|
|
while (*a1 == L'0') a1++;
|
|
|
|
while (*b1 == L'0') b1++;
|
|
|
|
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
if (iswdigit(*a1) && iswdigit(*b1)) {
|
|
|
|
// We keep the cmp value for the
|
|
|
|
// first differing digit.
|
|
|
|
//
|
|
|
|
// If the numbers have the same length, that's the value.
|
|
|
|
if (ret == 0) {
|
|
|
|
// Comparing the string value is the same as numerical
|
|
|
|
// for wchar_t digits!
|
|
|
|
if (*a1 > *b1) ret = 1;
|
|
|
|
if (*b1 > *a1) ret = -1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// We don't have negative numbers and we only allow ints,
|
|
|
|
// and we have already skipped leading zeroes,
|
|
|
|
// so the longer number is larger automatically.
|
|
|
|
if (iswdigit(*a1)) ret = 1;
|
|
|
|
if (iswdigit(*b1)) ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
a1++;
|
|
|
|
b1++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// For historical reasons, we skip trailing whitespace
|
|
|
|
// like fish_wcstol does!
|
|
|
|
// This is used in sorting globs, and that's supposed to be stable.
|
|
|
|
while (iswspace(*a1)) a1++;
|
|
|
|
while (iswspace(*b1)) b1++;
|
|
|
|
*a = a1;
|
|
|
|
*b = b1;
|
|
|
|
return ret;
|
2017-05-13 03:15:24 +00:00
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
/// Compare two strings, representing file names, using "natural" ordering. This means that letter
|
|
|
|
/// case is ignored. It also means that integers in each string are compared based on the decimal
|
|
|
|
/// value rather than the string representation. It only handles base 10 integers and they can
|
|
|
|
/// appear anywhere in each string, including multiple integers. This means that a file name like
|
|
|
|
/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123.
|
|
|
|
///
|
|
|
|
/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter
|
|
|
|
/// appears before the former.
|
|
|
|
///
|
|
|
|
/// This does not handle esoterica like Unicode combining characters. Nor does it use collating
|
|
|
|
/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher
|
|
|
|
/// Unicode code point. In part because doing so is really hard without the help of something like
|
|
|
|
/// the ICU library. But also because file names might be in a different encoding than is used by
|
|
|
|
/// the current fish process which results in weird situations. This is basically a best effort
|
|
|
|
/// implementation that will do the right thing 99.99% of the time.
|
|
|
|
///
|
|
|
|
/// Returns: -1 if a < b, 0 if a == b, 1 if a > b.
|
|
|
|
int wcsfilecmp(const wchar_t *a, const wchar_t *b) {
|
2019-05-28 00:24:19 +00:00
|
|
|
assert(a && b && "Null parameter");
|
2017-05-13 03:15:24 +00:00
|
|
|
const wchar_t *orig_a = a;
|
|
|
|
const wchar_t *orig_b = b;
|
|
|
|
int retval = 0; // assume the strings will be equal
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
while (*a && *b) {
|
2021-10-04 23:08:18 +00:00
|
|
|
if (iswdigit(*a) && iswdigit(*b)) {
|
2017-05-13 03:15:24 +00:00
|
|
|
retval = wcsfilecmp_leading_digits(&a, &b);
|
|
|
|
// If we know the strings aren't logically equal or we've reached the end of one or both
|
|
|
|
// strings we can stop iterating over the chars in each string.
|
|
|
|
if (retval || *a == 0 || *b == 0) break;
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
|
2021-09-30 15:47:08 +00:00
|
|
|
// Fast path: Skip towupper.
|
|
|
|
if (*a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2019-03-16 07:54:44 +00:00
|
|
|
wint_t al = towupper(*a);
|
|
|
|
wint_t bl = towupper(*b);
|
2019-05-28 14:04:31 +00:00
|
|
|
// Sort dashes after Z - see #5634
|
|
|
|
if (al == L'-') al = L'[';
|
|
|
|
if (bl == L'-') bl = L'[';
|
2019-03-16 08:08:21 +00:00
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
if (al < bl) {
|
|
|
|
retval = -1;
|
|
|
|
break;
|
|
|
|
} else if (al > bl) {
|
|
|
|
retval = 1;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
2012-11-19 00:30:30 +00:00
|
|
|
}
|
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
if (retval != 0) return retval; // we already know the strings aren't logically equal
|
2012-11-18 10:23:22 +00:00
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
if (*a == 0) {
|
|
|
|
if (*b == 0) {
|
|
|
|
// The strings are logically equal. They may or may not be the same length depending on
|
|
|
|
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
|
|
|
// differ by letter case or length. We don't bother optimizing the case where the file
|
|
|
|
// names are literally identical because that won't occur given how this function is
|
|
|
|
// used. And even if it were to occur (due to being reused in some other context) it
|
|
|
|
// would be so rare that it isn't worth optimizing for.
|
2019-03-12 21:06:01 +00:00
|
|
|
retval = std::wcscmp(orig_a, orig_b);
|
2017-05-13 03:15:24 +00:00
|
|
|
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
|
|
|
|
}
|
|
|
|
return -1; // string a is a prefix of b and b is longer
|
2012-11-18 10:23:22 +00:00
|
|
|
}
|
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
assert(*b == 0);
|
|
|
|
return 1; // string b is a prefix of a and a is longer
|
2005-09-20 13:26:39 +00:00
|
|
|
}
|
|
|
|
|
2020-02-14 17:44:54 +00:00
|
|
|
/// wcsfilecmp, but frozen in time for glob usage.
|
|
|
|
int wcsfilecmp_glob(const wchar_t *a, const wchar_t *b) {
|
|
|
|
assert(a && b && "Null parameter");
|
|
|
|
const wchar_t *orig_a = a;
|
|
|
|
const wchar_t *orig_b = b;
|
|
|
|
int retval = 0; // assume the strings will be equal
|
|
|
|
|
|
|
|
while (*a && *b) {
|
2021-10-04 23:08:18 +00:00
|
|
|
if (iswdigit(*a) && iswdigit(*b)) {
|
2020-02-14 17:44:54 +00:00
|
|
|
retval = wcsfilecmp_leading_digits(&a, &b);
|
|
|
|
// If we know the strings aren't logically equal or we've reached the end of one or both
|
|
|
|
// strings we can stop iterating over the chars in each string.
|
|
|
|
if (retval || *a == 0 || *b == 0) break;
|
|
|
|
}
|
|
|
|
|
2021-09-30 15:47:08 +00:00
|
|
|
// Fast path: Skip towlower.
|
|
|
|
if (*a == *b) {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2020-02-14 17:44:54 +00:00
|
|
|
wint_t al = towlower(*a);
|
|
|
|
wint_t bl = towlower(*b);
|
|
|
|
if (al < bl) {
|
|
|
|
retval = -1;
|
|
|
|
break;
|
|
|
|
} else if (al > bl) {
|
|
|
|
retval = 1;
|
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
a++;
|
|
|
|
b++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (retval != 0) return retval; // we already know the strings aren't logically equal
|
|
|
|
|
|
|
|
if (*a == 0) {
|
|
|
|
if (*b == 0) {
|
|
|
|
// The strings are logically equal. They may or may not be the same length depending on
|
|
|
|
// whether numbers were present but that doesn't matter. Disambiguate strings that
|
|
|
|
// differ by letter case or length. We don't bother optimizing the case where the file
|
|
|
|
// names are literally identical because that won't occur given how this function is
|
|
|
|
// used. And even if it were to occur (due to being reused in some other context) it
|
|
|
|
// would be so rare that it isn't worth optimizing for.
|
|
|
|
retval = wcscmp(orig_a, orig_b);
|
|
|
|
return retval < 0 ? -1 : retval == 0 ? 0 : 1;
|
|
|
|
}
|
|
|
|
return -1; // string a is a prefix of b and b is longer
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(*b == 0);
|
|
|
|
return 1; // string b is a prefix of a and a is longer
|
|
|
|
}
|
|
|
|
|
2017-05-13 03:15:24 +00:00
|
|
|
/// Return microseconds since the epoch.
|
2016-05-03 22:18:24 +00:00
|
|
|
long long get_time() {
|
2012-11-19 00:30:30 +00:00
|
|
|
struct timeval time_struct;
|
2019-11-19 02:34:50 +00:00
|
|
|
gettimeofday(&time_struct, nullptr);
|
2020-04-03 02:35:37 +00:00
|
|
|
return 1000000LL * time_struct.tv_sec + time_struct.tv_usec;
|
2005-09-20 13:26:39 +00:00
|
|
|
}
|