mirror of
https://github.com/nushell/nushell
synced 2025-01-13 13:49:21 +00:00
Strict JSON parsing (#11592)
# Description Adds the `--strict` flag for `from json` which will try to parse text while following the exact JSON specification (e.g., no comments or trailing commas allowed). Fixes issue #11548.
This commit is contained in:
parent
6530403ff8
commit
4e0a65c822
5 changed files with 190 additions and 48 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -5104,6 +5104,7 @@ version = "1.0.112"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d1bd37ce2324cf3bf85e5a25f96eb4baf0d5aa6eba43e7ae8958870c4ec48ed"
|
||||
dependencies = [
|
||||
"indexmap",
|
||||
"itoa",
|
||||
"ryu",
|
||||
"serde",
|
||||
|
|
|
@ -75,7 +75,7 @@ roxmltree = "0.18"
|
|||
rusqlite = { version = "0.29", features = ["bundled", "backup", "chrono"], optional = true }
|
||||
same-file = "1.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
serde_json = { version = "1.0", features = ["preserve_order"] }
|
||||
serde_urlencoded = "0.7"
|
||||
serde_yaml = "0.9"
|
||||
sha2 = "0.10"
|
||||
|
|
|
@ -22,6 +22,7 @@ impl Command for FromJson {
|
|||
Signature::build("from json")
|
||||
.input_output_types(vec![(Type::String, Type::Any)])
|
||||
.switch("objects", "treat each line as a separate value", Some('o'))
|
||||
.switch("strict", "follow the json specification exactly", Some('s'))
|
||||
.category(Category::Formats)
|
||||
}
|
||||
|
||||
|
@ -42,6 +43,14 @@ impl Command for FromJson {
|
|||
"b" => Value::test_list(vec![Value::test_int(1), Value::test_int(2)]),
|
||||
})),
|
||||
},
|
||||
Example {
|
||||
example: r#"'{ "a": 1, "b": 2 }' | from json -s"#,
|
||||
description: "Parse json strictly which will error on comments and trailing commas",
|
||||
result: Some(Value::test_record(record! {
|
||||
"a" => Value::test_int(1),
|
||||
"b" => Value::test_int(2),
|
||||
})),
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
|
@ -59,52 +68,61 @@ impl Command for FromJson {
|
|||
return Ok(PipelineData::new_with_metadata(metadata, span));
|
||||
}
|
||||
|
||||
let strict = call.has_flag(engine_state, stack, "strict")?;
|
||||
|
||||
// TODO: turn this into a structured underline of the nu_json error
|
||||
if call.has_flag(engine_state, stack, "objects")? {
|
||||
let converted_lines: Vec<Value> = string_input
|
||||
.lines()
|
||||
.filter_map(move |x| {
|
||||
if x.trim() == "" {
|
||||
None
|
||||
} else {
|
||||
match convert_string_to_value(x.to_string(), span) {
|
||||
Ok(v) => Some(v),
|
||||
Err(error) => Some(Value::error(error, span)),
|
||||
}
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
let lines = string_input.lines().filter(|line| !line.trim().is_empty());
|
||||
|
||||
let converted_lines: Vec<_> = if strict {
|
||||
lines
|
||||
.map(|line| {
|
||||
convert_string_to_value_strict(line, span)
|
||||
.unwrap_or_else(|err| Value::error(err, span))
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
lines
|
||||
.map(|line| {
|
||||
convert_string_to_value(line, span)
|
||||
.unwrap_or_else(|err| Value::error(err, span))
|
||||
})
|
||||
.collect()
|
||||
};
|
||||
|
||||
Ok(converted_lines
|
||||
.into_pipeline_data_with_metadata(metadata, engine_state.ctrlc.clone()))
|
||||
} else if strict {
|
||||
Ok(convert_string_to_value_strict(&string_input, span)?
|
||||
.into_pipeline_data_with_metadata(metadata))
|
||||
} else {
|
||||
Ok(convert_string_to_value(string_input, span)?
|
||||
Ok(convert_string_to_value(&string_input, span)?
|
||||
.into_pipeline_data_with_metadata(metadata))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
|
||||
fn convert_nujson_to_value(value: nu_json::Value, span: Span) -> Value {
|
||||
match value {
|
||||
nu_json::Value::Array(array) => {
|
||||
let v: Vec<Value> = array
|
||||
.iter()
|
||||
nu_json::Value::Array(array) => Value::list(
|
||||
array
|
||||
.into_iter()
|
||||
.map(|x| convert_nujson_to_value(x, span))
|
||||
.collect();
|
||||
|
||||
Value::list(v, span)
|
||||
}
|
||||
nu_json::Value::Bool(b) => Value::bool(*b, span),
|
||||
nu_json::Value::F64(f) => Value::float(*f, span),
|
||||
nu_json::Value::I64(i) => Value::int(*i, span),
|
||||
.collect(),
|
||||
span,
|
||||
),
|
||||
nu_json::Value::Bool(b) => Value::bool(b, span),
|
||||
nu_json::Value::F64(f) => Value::float(f, span),
|
||||
nu_json::Value::I64(i) => Value::int(i, span),
|
||||
nu_json::Value::Null => Value::nothing(span),
|
||||
nu_json::Value::Object(k) => Value::record(
|
||||
k.iter()
|
||||
.map(|(k, v)| (k.clone(), convert_nujson_to_value(v, span)))
|
||||
k.into_iter()
|
||||
.map(|(k, v)| (k, convert_nujson_to_value(v, span)))
|
||||
.collect(),
|
||||
span,
|
||||
),
|
||||
nu_json::Value::U64(u) => {
|
||||
if *u > i64::MAX as u64 {
|
||||
if u > i64::MAX as u64 {
|
||||
Value::error(
|
||||
ShellError::CantConvert {
|
||||
to_type: "i64 sized integer".into(),
|
||||
|
@ -115,22 +133,22 @@ fn convert_nujson_to_value(value: &nu_json::Value, span: Span) -> Value {
|
|||
span,
|
||||
)
|
||||
} else {
|
||||
Value::int(*u as i64, span)
|
||||
Value::int(u as i64, span)
|
||||
}
|
||||
}
|
||||
nu_json::Value::String(s) => Value::string(s.clone(), span),
|
||||
nu_json::Value::String(s) => Value::string(s, span),
|
||||
}
|
||||
}
|
||||
|
||||
// Converts row+column to a Span, assuming bytes (1-based rows)
|
||||
fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
|
||||
let mut cur_row = 1;
|
||||
let mut cur_col = 0;
|
||||
let mut cur_col = 1;
|
||||
|
||||
for (offset, curr_byte) in contents.bytes().enumerate() {
|
||||
if curr_byte == b'\n' {
|
||||
cur_row += 1;
|
||||
cur_col = 0;
|
||||
cur_col = 1;
|
||||
}
|
||||
if cur_row >= row && cur_col >= col {
|
||||
return Span::new(offset, offset);
|
||||
|
@ -142,22 +160,21 @@ fn convert_row_column_to_span(row: usize, col: usize, contents: &str) -> Span {
|
|||
Span::new(contents.len(), contents.len())
|
||||
}
|
||||
|
||||
fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, ShellError> {
|
||||
let result: Result<nu_json::Value, nu_json::Error> = nu_json::from_str(&string_input);
|
||||
match result {
|
||||
Ok(value) => Ok(convert_nujson_to_value(&value, span)),
|
||||
fn convert_string_to_value(string_input: &str, span: Span) -> Result<Value, ShellError> {
|
||||
match nu_json::from_str(string_input) {
|
||||
Ok(value) => Ok(convert_nujson_to_value(value, span)),
|
||||
|
||||
Err(x) => match x {
|
||||
nu_json::Error::Syntax(_, row, col) => {
|
||||
let label = x.to_string();
|
||||
let label_span = convert_row_column_to_span(row, col, &string_input);
|
||||
let label_span = convert_row_column_to_span(row, col, string_input);
|
||||
Err(ShellError::GenericError {
|
||||
error: "Error while parsing JSON text".into(),
|
||||
msg: "error parsing JSON text".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![ShellError::OutsideSpannedLabeledError {
|
||||
src: string_input,
|
||||
src: string_input.into(),
|
||||
error: "Error while parsing JSON text".into(),
|
||||
msg: label,
|
||||
span: label_span,
|
||||
|
@ -174,6 +191,35 @@ fn convert_string_to_value(string_input: String, span: Span) -> Result<Value, Sh
|
|||
}
|
||||
}
|
||||
|
||||
fn convert_string_to_value_strict(string_input: &str, span: Span) -> Result<Value, ShellError> {
|
||||
match serde_json::from_str(string_input) {
|
||||
Ok(value) => Ok(convert_nujson_to_value(value, span)),
|
||||
Err(err) => Err(if err.is_syntax() {
|
||||
let label = err.to_string();
|
||||
let label_span = convert_row_column_to_span(err.line(), err.column(), string_input);
|
||||
ShellError::GenericError {
|
||||
error: "Error while parsing JSON text".into(),
|
||||
msg: "error parsing JSON text".into(),
|
||||
span: Some(span),
|
||||
help: None,
|
||||
inner: vec![ShellError::OutsideSpannedLabeledError {
|
||||
src: string_input.into(),
|
||||
error: "Error while parsing JSON text".into(),
|
||||
msg: label,
|
||||
span: label_span,
|
||||
}],
|
||||
}
|
||||
} else {
|
||||
ShellError::CantConvert {
|
||||
to_type: format!("structured json data ({err})"),
|
||||
from_type: "string".into(),
|
||||
span,
|
||||
help: None,
|
||||
}
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
@ -43,6 +43,32 @@ fn from_json_text_to_table() {
|
|||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_json_text_to_table_strict() {
|
||||
Playground::setup("filter_from_json_test_1_strict", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"katz.txt",
|
||||
r#"
|
||||
{
|
||||
"katz": [
|
||||
{"name": "Yehuda", "rusty_luck": 1},
|
||||
{"name": "JT", "rusty_luck": 1},
|
||||
{"name": "Andres", "rusty_luck": 1},
|
||||
{"name":"GorbyPuff", "rusty_luck": 1}
|
||||
]
|
||||
}
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(),
|
||||
"open katz.txt | from json -s | get katz | get rusty_luck | length "
|
||||
);
|
||||
|
||||
assert_eq!(actual.out, "4");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_json_text_recognizing_objects_independently_to_table() {
|
||||
Playground::setup("filter_from_json_test_2", |dirs, sandbox| {
|
||||
|
@ -70,6 +96,33 @@ fn from_json_text_recognizing_objects_independently_to_table() {
|
|||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_json_text_recognizing_objects_independently_to_table_strict() {
|
||||
Playground::setup("filter_from_json_test_2_strict", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"katz.txt",
|
||||
r#"
|
||||
{"name": "Yehuda", "rusty_luck": 1}
|
||||
{"name": "JT", "rusty_luck": 1}
|
||||
{"name": "Andres", "rusty_luck": 1}
|
||||
{"name":"GorbyPuff", "rusty_luck": 3}
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open katz.txt
|
||||
| from json -o -s
|
||||
| where name == "GorbyPuff"
|
||||
| get rusty_luck.0
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "3");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_to_json_text() {
|
||||
Playground::setup("filter_to_json_test", |dirs, sandbox| {
|
||||
|
@ -99,6 +152,35 @@ fn table_to_json_text() {
|
|||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn table_to_json_text_strict() {
|
||||
Playground::setup("filter_to_json_test_strict", |dirs, sandbox| {
|
||||
sandbox.with_files(vec![FileWithContentToBeTrimmed(
|
||||
"sample.txt",
|
||||
r#"
|
||||
JonAndrehudaTZ,3
|
||||
GorbyPuff,100
|
||||
"#,
|
||||
)]);
|
||||
|
||||
let actual = nu!(
|
||||
cwd: dirs.test(), pipeline(
|
||||
r#"
|
||||
open sample.txt
|
||||
| lines
|
||||
| split column "," name luck
|
||||
| select name
|
||||
| to json
|
||||
| from json -s
|
||||
| get 0
|
||||
| get name
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "JonAndrehudaTZ");
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_level_values_from_json() {
|
||||
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
|
||||
|
@ -109,6 +191,28 @@ fn top_level_values_from_json() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn top_level_values_from_json_strict() {
|
||||
for (value, type_name) in [("null", "nothing"), ("true", "bool"), ("false", "bool")] {
|
||||
let actual = nu!(r#""{}" | from json -s | to json"#, value);
|
||||
assert_eq!(actual.out, value);
|
||||
let actual = nu!(r#""{}" | from json -s | describe"#, value);
|
||||
assert_eq!(actual.out, type_name);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strict_parsing_fails_on_comment() {
|
||||
let actual = nu!(r#"'{ "a": 1, /* comment */ "b": 2 }' | from json -s"#);
|
||||
assert!(actual.err.contains("error parsing JSON text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strict_parsing_fails_on_trailing_comma() {
|
||||
let actual = nu!(r#"'{ "a": 1, "b": 2, }' | from json -s"#);
|
||||
assert!(actual.err.contains("error parsing JSON text"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ranges_to_json_as_array() {
|
||||
let value = r#"[ 1, 2, 3]"#;
|
||||
|
|
|
@ -26,15 +26,6 @@ pub struct Deserializer<Iter: Iterator<Item = u8>> {
|
|||
state: State,
|
||||
}
|
||||
|
||||
// macro_rules! try_or_invalid {
|
||||
// ($self_:expr, $e:expr) => {
|
||||
// match $e {
|
||||
// Some(v) => v,
|
||||
// None => { return Err($self_.error(ErrorCode::InvalidNumber)); }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
impl<Iter> Deserializer<Iter>
|
||||
where
|
||||
Iter: Iterator<Item = u8>,
|
||||
|
|
Loading…
Reference in a new issue