Fix quoting in to nuon and refactor quoting functions (#14180)

<!--
if this PR closes one or more issues, you can automatically link the PR
with
them by using one of the [*linking
keywords*](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword),
e.g.
- this PR should close #xxxx
- fixes #xxxx

you can also mention related issues, PRs or discussions!
-->

# Description
<!--
Thank you for improving Nushell. Please, check our [contributing
guide](../CONTRIBUTING.md) and talk to the core team before making major
changes.

Description of your pull request goes here. **Provide examples and/or
screenshots** if your changes affect the user experience.
-->

This PR fixes the quoting and escaping of column names in `to nuon`.
Before the PR, column names with quotes inside them would get quoted,
but not escaped:

```nushell
> { 'a"b': 2 } | to nuon
{ "a"b": 2 }

> { 'a"b': 2 } | to nuon | from nuon
Error:   × error when loading nuon text
   ╭─[entry #1:1:27]
 1 │ { "a\"b": 2 } | to nuon | from nuon
   ·                           ────┬────
   ·                               ╰── could not load nuon text
   ╰────

Error:   × error when parsing nuon text
   ╭─[entry #1:1:27]
 1 │ { "a\"b": 2 } | to nuon | from nuon
   ·                           ────┬────
   ·                               ╰── could not parse nuon text
   ╰────

Error:   × error when parsing
   ╭────
 1 │ {"a"b": 2}
   ·          ┬
   ·          ╰── Unexpected end of code.
   ╰────

> [['a"b']; [2] [3]] | to nuon
[["a"b"]; [2], [3]]

> [['a"b']; [2] [3]] | to nuon | from nuon
Error:   × error when loading nuon text
   ╭─[entry #1:1:32]
 1 │ [['a"b']; [2] [3]] | to nuon | from nuon
   ·                                ────┬────
   ·                                    ╰── could not load nuon text
   ╰────

Error:   × error when parsing nuon text
   ╭─[entry #1:1:32]
 1 │ [['a"b']; [2] [3]] | to nuon | from nuon
   ·                                ────┬────
   ·                                    ╰── could not parse nuon text
   ╰────

Error:   × error when parsing
   ╭────
 1 │ [["a"b"]; [2], [3]]
   ·                   ┬
   ·                   ╰── Unexpected end of code.
   ╰────
```

After this PR, the quote is escaped properly:

```nushell
> { 'a"b': 2 } | to nuon
{ "a\"b": 2 }

> { 'a"b': 2 } | to nuon | from nuon
╭─────┬───╮
│ a"b │ 2 │
╰─────┴───╯

> [['a"b']; [2] [3]] | to nuon
[["a\"b"]; [2], [3]]

> [['a"b']; [2] [3]] | to nuon | from nuon
╭─────╮
│ a"b │
├─────┤
│   2 │
│   3 │
╰─────╯
```

The cause of the issue was that `to nuon` simply wrapped column names in
`'"'` instead of calling `escape_quote_string`.

As part of this change, I also moved the functions related to quoting
(`needs_quoting` and `escape_quote_string`) into `nu-utils`, since
previously they were defined in very ad-hoc places (and, in the case of
`escape_quote_string`, it was defined multiple times with the same
body!).

# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

`to nuon` now properly escapes quotes in column names.

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used` to
check that you're using the standard code style
- `cargo test --workspace` to check that all tests pass (on Windows make
sure to [enable developer
mode](https://learn.microsoft.com/en-us/windows/apps/get-started/developer-mode-features-and-debugging))
- `cargo run -- -c "use toolkit.nu; toolkit test stdlib"` to run the
tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

All tests pass, including workspace and stdlib tests.

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->
This commit is contained in:
Alex Ionescu 2024-10-29 13:43:26 +01:00 committed by GitHub
parent 3ec76af96e
commit 79ea70d4ec
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 76 additions and 86 deletions

6
Cargo.lock generated
View file

@ -3317,6 +3317,7 @@ dependencies = [
"nu-path", "nu-path",
"nu-plugin-engine", "nu-plugin-engine",
"nu-protocol", "nu-protocol",
"nu-utils",
"rstest", "rstest",
"serde_json", "serde_json",
] ]
@ -3522,10 +3523,12 @@ name = "nu-utils"
version = "0.99.2" version = "0.99.2"
dependencies = [ dependencies = [
"crossterm_winapi", "crossterm_winapi",
"fancy-regex",
"log", "log",
"lscolors", "lscolors",
"nix 0.29.0", "nix 0.29.0",
"num-format", "num-format",
"once_cell",
"serde", "serde",
"strip-ansi-escapes", "strip-ansi-escapes",
"sys-locale", "sys-locale",
@ -3755,11 +3758,10 @@ name = "nuon"
version = "0.99.2" version = "0.99.2"
dependencies = [ dependencies = [
"chrono", "chrono",
"fancy-regex",
"nu-engine", "nu-engine",
"nu-parser", "nu-parser",
"nu-protocol", "nu-protocol",
"once_cell", "nu-utils",
] ]
[[package]] [[package]]

View file

@ -1,6 +1,6 @@
use nu_cmd_base::hook::eval_hook; use nu_cmd_base::hook::eval_hook;
use nu_engine::{eval_block, eval_block_with_early_return}; use nu_engine::{eval_block, eval_block_with_early_return};
use nu_parser::{escape_quote_string, lex, parse, unescape_unquote_string, Token, TokenContents}; use nu_parser::{lex, parse, unescape_unquote_string, Token, TokenContents};
use nu_protocol::{ use nu_protocol::{
cli_error::report_compile_error, cli_error::report_compile_error,
debugger::WithoutDebug, debugger::WithoutDebug,
@ -10,7 +10,7 @@ use nu_protocol::{
}; };
#[cfg(windows)] #[cfg(windows)]
use nu_utils::enable_vt_processing; use nu_utils::enable_vt_processing;
use nu_utils::perf; use nu_utils::{escape_quote_string, perf};
use std::path::Path; use std::path::Path;
// This will collect environment variables from std::env and adds them to a stack. // This will collect environment variables from std::env and adds them to a stack.

View file

@ -19,6 +19,7 @@ nu-engine = { path = "../nu-engine", version = "0.99.2" }
nu-path = { path = "../nu-path", version = "0.99.2" } nu-path = { path = "../nu-path", version = "0.99.2" }
nu-plugin-engine = { path = "../nu-plugin-engine", optional = true, version = "0.99.2" } nu-plugin-engine = { path = "../nu-plugin-engine", optional = true, version = "0.99.2" }
nu-protocol = { path = "../nu-protocol", version = "0.99.2" } nu-protocol = { path = "../nu-protocol", version = "0.99.2" }
nu-utils = { path = "../nu-utils", version = "0.99.2" }
bytesize = { workspace = true } bytesize = { workspace = true }
chrono = { default-features = false, features = ['std'], workspace = true } chrono = { default-features = false, features = ['std'], workspace = true }

View file

@ -1,3 +1,5 @@
use nu_utils::escape_quote_string;
fn string_should_be_quoted(input: &str) -> bool { fn string_should_be_quoted(input: &str) -> bool {
input.starts_with('$') input.starts_with('$')
|| input || input
@ -5,21 +7,6 @@ fn string_should_be_quoted(input: &str) -> bool {
.any(|c| c == ' ' || c == '(' || c == '\'' || c == '`' || c == '"' || c == '\\') .any(|c| c == ' ' || c == '(' || c == '\'' || c == '`' || c == '"' || c == '\\')
} }
pub fn escape_quote_string(input: &str) -> String {
let mut output = String::with_capacity(input.len() + 2);
output.push('"');
for c in input.chars() {
if c == '"' || c == '\\' {
output.push('\\');
}
output.push(c);
}
output.push('"');
output
}
// Escape rules: // Escape rules:
// input argument is not a flag, does not start with $ and doesn't contain special characters, it is passed as it is (foo -> foo) // input argument is not a flag, does not start with $ and doesn't contain special characters, it is passed as it is (foo -> foo)
// input argument is not a flag and either starts with $ or contains special characters, quotes are added, " and \ are escaped (two \words -> "two \\words") // input argument is not a flag and either starts with $ or contains special characters, quotes are added, " and \ are escaped (two \words -> "two \\words")

View file

@ -11,7 +11,7 @@ mod parse_shape_specs;
mod parser; mod parser;
mod type_check; mod type_check;
pub use deparse::{escape_for_script_arg, escape_quote_string}; pub use deparse::escape_for_script_arg;
pub use flatten::{ pub use flatten::{
flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape, flatten_block, flatten_expression, flatten_pipeline, flatten_pipeline_element, FlatShape,
}; };

View file

@ -235,6 +235,7 @@ macro_rules! nu_with_plugins {
use crate::{Outcome, NATIVE_PATH_ENV_VAR}; use crate::{Outcome, NATIVE_PATH_ENV_VAR};
use nu_path::{AbsolutePath, AbsolutePathBuf, Path, PathBuf}; use nu_path::{AbsolutePath, AbsolutePathBuf, Path, PathBuf};
use nu_utils::escape_quote_string;
use std::{ use std::{
ffi::OsStr, ffi::OsStr,
process::{Command, Stdio}, process::{Command, Stdio},
@ -421,21 +422,6 @@ where
Outcome::new(out, err.into_owned(), output.status) Outcome::new(out, err.into_owned(), output.status)
} }
fn escape_quote_string(input: &str) -> String {
let mut output = String::with_capacity(input.len() + 2);
output.push('"');
for c in input.chars() {
if c == '"' || c == '\\' {
output.push('\\');
}
output.push(c);
}
output.push('"');
output
}
fn with_exe(name: &str) -> String { fn with_exe(name: &str) -> String {
#[cfg(windows)] #[cfg(windows)]
{ {

View file

@ -17,9 +17,11 @@ bench = false
bench = false bench = false
[dependencies] [dependencies]
fancy-regex = { workspace = true }
lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] } lscolors = { workspace = true, default-features = false, features = ["nu-ansi-term"] }
log = { workspace = true } log = { workspace = true }
num-format = { workspace = true } num-format = { workspace = true }
once_cell = { workspace = true }
strip-ansi-escapes = { workspace = true } strip-ansi-escapes = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
sys-locale = "0.3" sys-locale = "0.3"

View file

@ -4,6 +4,7 @@ mod deansi;
pub mod emoji; pub mod emoji;
pub mod filesystem; pub mod filesystem;
pub mod locale; pub mod locale;
mod quoting;
mod shared_cow; mod shared_cow;
pub mod utils; pub mod utils;
@ -18,6 +19,7 @@ pub use deansi::{
strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely, strip_ansi_likely, strip_ansi_string_likely, strip_ansi_string_unlikely, strip_ansi_unlikely,
}; };
pub use emoji::contains_emoji; pub use emoji::contains_emoji;
pub use quoting::{escape_quote_string, needs_quoting};
pub use shared_cow::SharedCow; pub use shared_cow::SharedCow;
#[cfg(unix)] #[cfg(unix)]

View file

@ -0,0 +1,43 @@
use fancy_regex::Regex;
use once_cell::sync::Lazy;
// This hits, in order:
// • Any character of []:`{}#'";()|$,
// • Any digit (\d)
// • Any whitespace (\s)
// • Case-insensitive sign-insensitive float "keywords" inf, infinity and nan.
static NEEDS_QUOTING_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"[\[\]:`\{\}#'";\(\)\|\$,\d\s]|(?i)^[+\-]?(inf(inity)?|nan)$"#)
.expect("internal error: NEEDS_QUOTING_REGEX didn't compile")
});
pub fn needs_quoting(string: &str) -> bool {
if string.is_empty() {
return true;
}
// These are case-sensitive keywords
match string {
// `true`/`false`/`null` are active keywords in JSON and NUON
// `&&` is denied by the nu parser for diagnostics reasons
// (https://github.com/nushell/nushell/pull/7241)
"true" | "false" | "null" | "&&" => return true,
_ => (),
};
// All other cases are handled here
NEEDS_QUOTING_REGEX.is_match(string).unwrap_or(false)
}
pub fn escape_quote_string(string: &str) -> String {
let mut output = String::with_capacity(string.len() + 2);
output.push('"');
for c in string.chars() {
if c == '"' || c == '\\' {
output.push('\\');
}
output.push(c);
}
output.push('"');
output
}

View file

@ -13,8 +13,7 @@ version = "0.99.2"
nu-parser = { path = "../nu-parser", version = "0.99.2" } nu-parser = { path = "../nu-parser", version = "0.99.2" }
nu-protocol = { path = "../nu-protocol", version = "0.99.2" } nu-protocol = { path = "../nu-protocol", version = "0.99.2" }
nu-engine = { path = "../nu-engine", version = "0.99.2" } nu-engine = { path = "../nu-engine", version = "0.99.2" }
once_cell = { workspace = true } nu-utils = { path = "../nu-utils", version = "0.99.2" }
fancy-regex = { workspace = true }
[dev-dependencies] [dev-dependencies]
chrono = { workspace = true } chrono = { workspace = true }

View file

@ -1,10 +1,8 @@
use core::fmt::Write; use core::fmt::Write;
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use nu_engine::get_columns; use nu_engine::get_columns;
use nu_parser::escape_quote_string;
use nu_protocol::{Range, ShellError, Span, Value}; use nu_protocol::{Range, ShellError, Span, Value};
use nu_utils::{escape_quote_string, needs_quoting};
use std::ops::Bound; use std::ops::Bound;
@ -129,11 +127,12 @@ fn value_to_string(
let headers: Vec<String> = headers let headers: Vec<String> = headers
.iter() .iter()
.map(|string| { .map(|string| {
if needs_quotes(string) { let string = if needs_quoting(string) {
format!("{idt}\"{string}\"") &escape_quote_string(string)
} else { } else {
string
};
format!("{idt}{string}") format!("{idt}{string}")
}
}) })
.collect(); .collect();
let headers_output = headers.join(&format!(",{sep}{nl}{idt_pt}")); let headers_output = headers.join(&format!(",{sep}{nl}{idt_pt}"));
@ -199,19 +198,15 @@ fn value_to_string(
Value::Record { val, .. } => { Value::Record { val, .. } => {
let mut collection = vec![]; let mut collection = vec![];
for (col, val) in &**val { for (col, val) in &**val {
collection.push(if needs_quotes(col) { let col = if needs_quoting(col) {
format!( &escape_quote_string(col)
"{idt_po}\"{}\": {}",
col,
value_to_string_without_quotes(val, span, depth + 1, indent)?
)
} else { } else {
format!( col
"{idt_po}{}: {}", };
col, collection.push(format!(
"{idt_po}{col}: {}",
value_to_string_without_quotes(val, span, depth + 1, indent)? value_to_string_without_quotes(val, span, depth + 1, indent)?
) ));
});
} }
Ok(format!( Ok(format!(
"{{{nl}{}{nl}{idt}}}", "{{{nl}{}{nl}{idt}}}",
@ -247,7 +242,7 @@ fn value_to_string_without_quotes(
) -> Result<String, ShellError> { ) -> Result<String, ShellError> {
match v { match v {
Value::String { val, .. } => Ok({ Value::String { val, .. } => Ok({
if needs_quotes(val) { if needs_quoting(val) {
escape_quote_string(val) escape_quote_string(val)
} else { } else {
val.clone() val.clone()
@ -256,30 +251,3 @@ fn value_to_string_without_quotes(
_ => value_to_string(v, span, depth, indent), _ => value_to_string(v, span, depth, indent),
} }
} }
// This hits, in order:
// • Any character of []:`{}#'";()|$,
// • Any digit (\d)
// • Any whitespace (\s)
// • Case-insensitive sign-insensitive float "keywords" inf, infinity and nan.
static NEEDS_QUOTES_REGEX: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"[\[\]:`\{\}#'";\(\)\|\$,\d\s]|(?i)^[+\-]?(inf(inity)?|nan)$"#)
.expect("internal error: NEEDS_QUOTES_REGEX didn't compile")
});
fn needs_quotes(string: &str) -> bool {
if string.is_empty() {
return true;
}
// These are case-sensitive keywords
match string {
// `true`/`false`/`null` are active keywords in JSON and NUON
// `&&` is denied by the nu parser for diagnostics reasons
// (https://github.com/nushell/nushell/pull/7241)
// TODO: remove the extra check in the nuon codepath
"true" | "false" | "null" | "&&" => return true,
_ => (),
};
// All other cases are handled here
NEEDS_QUOTES_REGEX.is_match(string).unwrap_or(false)
}

View file

@ -1,11 +1,11 @@
use nu_engine::{command_prelude::*, get_full_help}; use nu_engine::{command_prelude::*, get_full_help};
use nu_parser::{escape_for_script_arg, escape_quote_string, parse}; use nu_parser::{escape_for_script_arg, parse};
use nu_protocol::{ use nu_protocol::{
ast::{Expr, Expression}, ast::{Expr, Expression},
engine::StateWorkingSet, engine::StateWorkingSet,
report_parse_error, report_parse_error,
}; };
use nu_utils::stdout_write_all_and_flush; use nu_utils::{escape_quote_string, stdout_write_all_and_flush};
pub(crate) fn gather_commandline_args() -> (Vec<String>, String, Vec<String>) { pub(crate) fn gather_commandline_args() -> (Vec<String>, String, Vec<String>) {
// Would be nice if we had a way to parse this. The first flags we see will be going to nushell // Would be nice if we had a way to parse this. The first flags we see will be going to nushell