Reimplement wbasename and wdirname

Previously wbasename and wdirname wrapped the system-provided basename
and dirname. But these have thread-safety issues and some surprising
error conditions on Mac. Just reimplement these per the OpenGroup spec.

In particular these no longer trigger a null-dereference if the input
exceeds PATH_MAX.

Add some tests too.

This fixes #7837

(cherry picked from commit cf35431af9)
This commit is contained in:
ridiculousfish 2021-03-21 16:00:29 -07:00 committed by David Adam
parent 951fc6b954
commit 8b825bf760
3 changed files with 67 additions and 10 deletions

View file

@ -195,6 +195,16 @@ static void popd() {
env_stack_t::principal().set_pwd_from_getcwd();
}
// Helper to return a string whose length greatly exceeds PATH_MAX.
wcstring get_overlong_path() {
wcstring longpath;
longpath.reserve(PATH_MAX * 2 + 10);
while (longpath.size() <= PATH_MAX * 2) {
longpath += L"/overlong";
}
return longpath;
}
// The odd formulation of these macros is to avoid "multiple unary operator" warnings from oclint
// were we to use the more natural "if (!(e)) err(..." form. We have to do this because the rules
// for the C preprocessor make it practically impossible to embed a comment in the body of a macro.
@ -5336,6 +5346,13 @@ static void test_highlighting() {
{L"# comment", highlight_role_t::comment},
});
// Overlong paths don't crash (#7837).
const wcstring overlong = get_overlong_path();
highlight_tests.push_back({
{L"touch", highlight_role_t::command},
{overlong.c_str(), highlight_role_t::param},
});
highlight_tests.push_back({
{L"a", highlight_role_t::param},
{L"=", highlight_role_t::operat, ns},
@ -6218,6 +6235,11 @@ void test_dirname_basename() {
base.c_str());
}
}
// Ensures strings which greatly exceed PATH_MAX still work (#7837).
wcstring longpath = get_overlong_path();
wcstring longpath_dir = longpath.substr(0, longpath.rfind(L'/'));
do_test(wdirname(longpath) == longpath_dir);
do_test(wbasename(longpath) == L"overlong");
}
static void test_topic_monitor() {

View file

@ -400,16 +400,51 @@ wcstring path_normalize_for_cd(const wcstring &wd, const wcstring &path) {
return result;
}
wcstring wdirname(const wcstring &path) {
std::string tmp = wcs2string(path);
const char *narrow_res = dirname(&tmp[0]);
return str2wcstring(narrow_res);
wcstring wdirname(wcstring path) {
// Do not use system-provided dirname (#7837).
// On Mac it's not thread safe, and will error for paths exceeding PATH_MAX.
// This follows OpenGroup dirname recipe.
// 1: Double-slash stays.
if (path == L"//") return path;
// 2: All slashes => return slash.
if (!path.empty() && path.find_first_not_of(L'/') == wcstring::npos) return L"/";
// 3: Trim trailing slashes.
while (!path.empty() && path.back() == L'/') path.pop_back();
// 4: No slashes left => return period.
size_t last_slash = path.rfind(L'/');
if (last_slash == wcstring::npos) return L".";
// 5: Remove trailing non-slashes.
path.erase(last_slash + 1, wcstring::npos);
// 6: Skip as permitted.
// 7: Remove trailing slashes again.
while (!path.empty() && path.back() == L'/') path.pop_back();
// 8: Empty => return slash.
if (path.empty()) path = L"/";
return path;
}
wcstring wbasename(const wcstring &path) {
std::string tmp = wcs2string(path);
char *narrow_res = basename(&tmp[0]);
return str2wcstring(narrow_res);
wcstring wbasename(wcstring path) {
// This follows OpenGroup basename recipe.
// 1: empty => allowed to return ".". This is what system impls do.
if (path.empty()) return L".";
// 2: Skip as permitted.
// 3: All slashes => return slash.
if (!path.empty() && path.find_first_not_of(L'/') == wcstring::npos) return L"/";
// 4: Remove trailing slashes.
while (!path.empty() && path.back() == L'/') path.pop_back();
// 5: Remove up to and including last slash.
size_t last_slash = path.rfind(L'/');
if (last_slash != wcstring::npos) path.erase(0, last_slash + 1);
return path;
}
// Really init wgettext.

View file

@ -75,10 +75,10 @@ bool wreaddir_resolving(DIR *dir, const std::wstring &dir_path, wcstring &out_na
bool wreaddir_for_dirs(DIR *dir, wcstring *out_name);
/// Wide character version of dirname().
std::wstring wdirname(const std::wstring &path);
std::wstring wdirname(std::wstring path);
/// Wide character version of basename().
std::wstring wbasename(const std::wstring &path);
std::wstring wbasename(std::wstring path);
/// Wide character wrapper around the gettext function. For historic reasons, unlike the real
/// gettext function, wgettext takes care of setting the correct domain, etc. using the textdomain