fish-shell/src/util.cpp

// Generic utilities library.
#include "config.h"  // IWYU pragma: keep

#include <errno.h>
#include <stddef.h>
#include <sys/time.h>
#include <wchar.h>
#include <wctype.h>

#include "common.h"
#include "fallback.h"  // IWYU pragma: keep
#include "util.h"
#include "wutil.h"  // IWYU pragma: keep

// Compare the strings to see if they begin with an integer that can be compared and return the
// result of that comparison.
static int wcsfilecmp_leading_digits(const wchar_t **a, const wchar_t **b) {
    const wchar_t *a_end, *b_end;

    long a_num = fish_wcstol(*a, &a_end, 10);
    if (errno > 0) return 0;  // invalid number -- fallback to simple string compare
    long b_num = fish_wcstol(*b, &b_end, 10);
    if (errno > 0) return 0;  // invalid number -- fallback to simple string compare

    if (a_num < b_num) return -1;
    if (a_num > b_num) return 1;
    *a = a_end;
    *b = b_end;
    return 0;
}

/// Compare two strings, representing file names, using "natural" ordering. This means that letter
/// case is ignored. It also means that integers in each string are compared based on the decimal
/// value rather than the string representation. It only handles base 10 integers and they can
/// appear anywhere in each string, including multiple integers. This means that a file name like
/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123.
///
/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter
/// appears before the former.
///
/// This does not handle esoterica like Unicode combining characters. Nor does it use collating
/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher
/// Unicode code point. In part because doing so is really hard without the help of something like
/// the ICU library. But also because file names might be in a different encoding than is used by
/// the current fish process which results in weird situations. This is basically a best effort
/// implementation that will do the right thing 99.99% of the time.
///
/// Returns: -1 if a < b, 0 if a == b, 1 if a > b.
int wcsfilecmp(const wchar_t *a, const wchar_t *b) {
    CHECK(a, 0);
    CHECK(b, 0);
    const wchar_t *orig_a = a;
    const wchar_t *orig_b = b;
    int retval = 0;  // assume the strings will be equal

    while (*a && *b) {
        if (iswdigit(*a) && iswdigit(*b)) {
            retval = wcsfilecmp_leading_digits(&a, &b);
            // If we know the strings aren't logically equal or we've reached the end of one or both
            // strings we can stop iterating over the chars in each string.
            if (retval || *a == 0 || *b == 0) break;
        }

        wint_t al = towlower(*a);
        wint_t bl = towlower(*b);
        if (al < bl) {
            retval = -1;
            break;
        } else if (al > bl) {
            retval = 1;
            break;
        } else {
            a++;
            b++;
        }
    }

    if (retval != 0) return retval;  // we already know the strings aren't logically equal

    if (*a == 0) {
        if (*b == 0) {
            // The strings are logically equal. They may or may not be the same length depending on
            // whether numbers were present but that doesn't matter. Disambiguate strings that
            // differ by letter case or length. We don't bother optimizing the case where the file
            // names are literally identical because that won't occur given how this function is
            // used. And even if it were to occur (due to being reused in some other context) it
            // would be so rare that it isn't worth optimizing for.
            retval = wcscmp(orig_a, orig_b);
            return retval < 0 ? -1 : retval == 0 ? 0 : 1;
        }
        return -1;  // string a is a prefix of b and b is longer
    }

    assert(*b == 0);
    return 1;  // string b is a prefix of a and a is longer
}

/// Return microseconds since the epoch.
long long get_time() {
    struct timeval time_struct;
    gettimeofday(&time_struct, 0);
    return 1000000ll * time_struct.tv_sec + time_struct.tv_usec;
}
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`// Generic utilities library.`
add (or restore) config.h to all files The autoconf-generated config.h contains a number of directives which may alter the behaviour of system headers on certain platforms. Always include it in every C++ file as the first include. Closes #2993. 2016-05-18 22:30:21 +00:00			`#include "config.h" // IWYU pragma: keep`

restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`#include <errno.h>`
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`#include <stddef.h>`
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`#include <sys/time.h>`
Initial revision darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz 2005-09-20 13:26:39 +00:00			`#include <wchar.h>`
			`#include <wctype.h>`

restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`#include "common.h"`
add better support for IWYU and fix things Remove the "make iwyu" build target. Move the functionality into the recently introduced lint.fish script. Fix a lot, but not all, of the include-what-you-use errors. Specifically, it fixes all of the IWYU errors on my OS X server but only removes some of them on my Ubuntu 14.04 server. Fixes #2957 2016-04-21 06:00:54 +00:00			`#include "fallback.h" // IWYU pragma: keep`
Initial revision darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz 2005-09-20 13:26:39 +00:00			`#include "util.h"`
add better support for IWYU and fix things Remove the "make iwyu" build target. Move the functionality into the recently introduced lint.fish script. Fix a lot, but not all, of the include-what-you-use errors. Specifically, it fixes all of the IWYU errors on my OS X server but only removes some of them on my Ubuntu 14.04 server. Fixes #2957 2016-04-21 06:00:54 +00:00			`#include "wutil.h" // IWYU pragma: keep`
Initial revision darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz 2005-09-20 13:26:39 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`// Compare the strings to see if they begin with an integer that can be compared and return the`
			`// result of that comparison.`
			`static int wcsfilecmp_leading_digits(const wchar_t a, const wchar_t b) {`
			`const wchar_t a_end, b_end;`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`long a_num = fish_wcstol(*a, &a_end, 10);`
			`if (errno > 0) return 0; // invalid number -- fallback to simple string compare`
			`long b_num = fish_wcstol(*b, &b_end, 10);`
			`if (errno > 0) return 0; // invalid number -- fallback to simple string compare`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`if (a_num < b_num) return -1;`
			`if (a_num > b_num) return 1;`
			`*a = a_end;`
			`*b = b_end;`
			`return 0;`
			`}`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`/// Compare two strings, representing file names, using "natural" ordering. This means that letter`
			`/// case is ignored. It also means that integers in each string are compared based on the decimal`
			`/// value rather than the string representation. It only handles base 10 integers and they can`
			`/// appear anywhere in each string, including multiple integers. This means that a file name like`
			`/// "0xAF0123" is treated as the literal "0xAF" followed by the integer 123.`
			`///`
			`/// The intent is to ensure that file names like "file23" and "file5" are sorted so that the latter`
			`/// appears before the former.`
			`///`
			`/// This does not handle esoterica like Unicode combining characters. Nor does it use collating`
			`/// sequences. Which means that an ASCII "A" will be less than an equivalent character with a higher`
			`/// Unicode code point. In part because doing so is really hard without the help of something like`
			`/// the ICU library. But also because file names might be in a different encoding than is used by`
			`/// the current fish process which results in weird situations. This is basically a best effort`
			`/// implementation that will do the right thing 99.99% of the time.`
			`///`
			`/// Returns: -1 if a < b, 0 if a == b, 1 if a > b.`
			`int wcsfilecmp(const wchar_t a, const wchar_t b) {`
Fix the build on FreeBSD with Clang NULL expands to nullptr which cannot be cast to an int. Replace it with 0 in wcsfilecmp. Fixes issue #4136 2017-06-17 15:03:37 +00:00			`CHECK(a, 0);`
			`CHECK(b, 0);`
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`const wchar_t *orig_a = a;`
			`const wchar_t *orig_b = b;`
			`int retval = 0; // assume the strings will be equal`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`while (a && b) {`
			`if (iswdigit(a) && iswdigit(b)) {`
			`retval = wcsfilecmp_leading_digits(&a, &b);`
			`// If we know the strings aren't logically equal or we've reached the end of one or both`
			`// strings we can stop iterating over the chars in each string.`
			`if (retval \|\| a == 0 \|\| b == 0) break;`
			`}`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`wint_t al = towlower(*a);`
			`wint_t bl = towlower(*b);`
			`if (al < bl) {`
			`retval = -1;`
			`break;`
			`} else if (al > bl) {`
			`retval = 1;`
			`break;`
			`} else {`
			`a++;`
			`b++;`
			`}`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00			`}`

fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`if (retval != 0) return retval; // we already know the strings aren't logically equal`
Remove trailing whitespaces and change tabs to spaces 2012-11-18 10:23:22 +00:00
fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`if (*a == 0) {`
			`if (*b == 0) {`
			`// The strings are logically equal. They may or may not be the same length depending on`
			`// whether numbers were present but that doesn't matter. Disambiguate strings that`
			`// differ by letter case or length. We don't bother optimizing the case where the file`
			`// names are literally identical because that won't occur given how this function is`
			`// used. And even if it were to occur (due to being reused in some other context) it`
			`// would be so rare that it isn't worth optimizing for.`
			`retval = wcscmp(orig_a, orig_b);`
			`return retval < 0 ? -1 : retval == 0 ? 0 : 1;`
			`}`
			`return -1; // string a is a prefix of b and b is longer`
Remove trailing whitespaces and change tabs to spaces 2012-11-18 10:23:22 +00:00			`}`

fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`assert(*b == 0);`
			`return 1; // string b is a prefix of a and a is longer`
Initial revision darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz 2005-09-20 13:26:39 +00:00			`}`

fix `wcsfilecmp()` This started out as a refactoring to eliminate the lint warnings. Adding unit tests revealed the current implementation does not behave as implied. So this is a complete rewrite of the implementation. With the addition of unit tests so that it doesn't break in the future and anyone who thinks this new version behaves wrong can update the unit tests to help ensure we're testing for the correct behavior. Fixes #4027 2017-05-13 03:15:24 +00:00			`/// Return microseconds since the epoch.`
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`long long get_time() {`
Apply new indentation, brace, and whitespace style 2012-11-19 00:30:30 +00:00			`struct timeval time_struct;`
			`gettimeofday(&time_struct, 0);`
restyle remaining modules to match project style For this change I decided to bundle the remaining modules that need to be resytyled because only two were large enough to warrant doing on their own. Reduces lint errors from 225 to 162 (-28%). Line count from 3073 to 2465 (-20%). Another step in resolving issue #2902. 2016-05-03 22:18:24 +00:00			`return 1000000ll * time_struct.tv_sec + time_struct.tv_usec;`
Initial revision darcs-hash:20050920132639-ac50b-fa3b476891e1f5f67207cf4cc7bf623834cc5edc.gz 2005-09-20 13:26:39 +00:00			`}`