Add from csv and from tsv (#320)

This commit is contained in:
JT 2021-11-10 09:17:37 +13:00 committed by GitHub
parent 0f516a0830
commit bb1740d733
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 277 additions and 14 deletions

24
Cargo.lock generated
View file

@ -121,6 +121,7 @@ dependencies = [
"lazy_static", "lazy_static",
"memchr", "memchr",
"regex-automata", "regex-automata",
"serde",
] ]
[[package]] [[package]]
@ -326,6 +327,28 @@ dependencies = [
"winapi", "winapi",
] ]
[[package]]
name = "csv"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1"
dependencies = [
"bstr",
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90"
dependencies = [
"memchr",
]
[[package]] [[package]]
name = "ctor" name = "ctor"
version = "0.1.21" version = "0.1.21"
@ -719,6 +742,7 @@ dependencies = [
"chrono", "chrono",
"chrono-humanize", "chrono-humanize",
"chrono-tz", "chrono-tz",
"csv",
"dialoguer", "dialoguer",
"glob", "glob",
"lscolors", "lscolors",

View file

@ -18,6 +18,7 @@ trash = { version = "1.3.0", optional = true }
unicode-segmentation = "1.8.0" unicode-segmentation = "1.8.0"
# Potential dependencies for extras # Potential dependencies for extras
csv = "1.1.3"
glob = "0.3.0" glob = "0.3.0"
Inflector = "0.11" Inflector = "0.11"
thiserror = "1.0.29" thiserror = "1.0.29"

View file

@ -45,7 +45,9 @@ pub fn create_default_context() -> EngineState {
For, For,
Format, Format,
From, From,
FromCsv,
FromJson, FromJson,
FromTsv,
Get, Get,
Griddle, Griddle,
Help, Help,

View file

@ -0,0 +1,113 @@
use super::delimited::from_delimited_data;
use nu_engine::CallExt;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{Example, PipelineData, ShellError, Signature, SyntaxShape, Value};
#[derive(Clone)]
pub struct FromCsv;
impl Command for FromCsv {
fn name(&self) -> &str {
"from csv"
}
fn signature(&self) -> Signature {
Signature::build("from csv")
.named(
"separator",
SyntaxShape::String,
"a character to separate columns, defaults to ','",
Some('s'),
)
.switch(
"noheaders",
"don't treat the first row as column names",
Some('n'),
)
}
fn usage(&self) -> &str {
"Parse text as .csv and create table."
}
fn run(
&self,
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
from_csv(engine_state, stack, call, input)
}
fn examples(&self) -> Vec<Example> {
vec![
Example {
description: "Convert comma-separated data to a table",
example: "open data.txt | from csv",
result: None,
},
Example {
description: "Convert comma-separated data to a table, ignoring headers",
example: "open data.txt | from csv --noheaders",
result: None,
},
Example {
description: "Convert comma-separated data to a table, ignoring headers",
example: "open data.txt | from csv -n",
result: None,
},
Example {
description: "Convert semicolon-separated data to a table",
example: "open data.txt | from csv --separator ';'",
result: None,
},
]
}
}
fn from_csv(
engine_state: &EngineState,
stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<PipelineData, ShellError> {
let name = call.head;
let noheaders = call.has_flag("noheaders");
let separator: Option<Value> = call.get_flag(engine_state, stack, "separator")?;
let sep = match separator {
Some(Value::String { val: s, span }) => {
if s == r"\t" {
'\t'
} else {
let vec_s: Vec<char> = s.chars().collect();
if vec_s.len() != 1 {
return Err(ShellError::MissingParameter(
"single character separator".into(),
span,
));
};
vec_s[0]
}
}
_ => ',',
};
from_delimited_data(noheaders, sep, input, name)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromCsv {})
}
}

View file

@ -0,0 +1,61 @@
use csv::ReaderBuilder;
use nu_protocol::{IntoPipelineData, PipelineData, ShellError, Span, Value};
fn from_delimited_string_to_value(
s: String,
noheaders: bool,
separator: char,
span: Span,
) -> Result<Value, csv::Error> {
let mut reader = ReaderBuilder::new()
.has_headers(!noheaders)
.delimiter(separator as u8)
.from_reader(s.as_bytes());
let headers = if noheaders {
(1..=reader.headers()?.len())
.map(|i| format!("Column{}", i))
.collect::<Vec<String>>()
} else {
reader.headers()?.iter().map(String::from).collect()
};
let mut rows = vec![];
for row in reader.records() {
let mut output_row = vec![];
for value in row?.iter() {
if let Ok(i) = value.parse::<i64>() {
output_row.push(Value::Int { val: i, span });
} else if let Ok(f) = value.parse::<f64>() {
output_row.push(Value::Float { val: f, span });
} else {
output_row.push(Value::String {
val: value.into(),
span,
});
}
}
rows.push(Value::Record {
cols: headers.clone(),
vals: output_row,
span,
});
}
Ok(Value::List { vals: rows, span })
}
pub fn from_delimited_data(
noheaders: bool,
sep: char,
input: PipelineData,
name: Span,
) -> Result<PipelineData, ShellError> {
let concat_string = input.collect_string("");
Ok(
from_delimited_string_to_value(concat_string, noheaders, sep, name)
.map_err(|x| ShellError::DelimiterError(x.to_string(), name))?
.into_pipeline_data(),
)
}

View file

@ -77,7 +77,7 @@ impl Command for FromJson {
input: PipelineData, input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> { ) -> Result<nu_protocol::PipelineData, ShellError> {
let span = call.head; let span = call.head;
let mut string_input = input.collect_string(); let mut string_input = input.collect_string("");
string_input.push('\n'); string_input.push('\n');
// TODO: turn this into a structured underline of the nu_json error // TODO: turn this into a structured underline of the nu_json error

View file

@ -1,5 +1,10 @@
mod command; mod command;
mod csv;
mod delimited;
mod json; mod json;
mod tsv;
pub use self::csv::FromCsv;
pub use command::From; pub use command::From;
pub use json::FromJson; pub use json::FromJson;
pub use tsv::FromTsv;

View file

@ -0,0 +1,56 @@
use super::delimited::from_delimited_data;
use nu_protocol::ast::Call;
use nu_protocol::engine::{Command, EngineState, Stack};
use nu_protocol::{PipelineData, ShellError, Signature};
#[derive(Clone)]
pub struct FromTsv;
impl Command for FromTsv {
fn name(&self) -> &str {
"from tsv"
}
fn signature(&self) -> Signature {
Signature::build("from csv").switch(
"noheaders",
"don't treat the first row as column names",
Some('n'),
)
}
fn usage(&self) -> &str {
"Parse text as .csv and create table."
}
fn run(
&self,
_engine_state: &EngineState,
_stack: &mut Stack,
call: &Call,
input: PipelineData,
) -> Result<nu_protocol::PipelineData, ShellError> {
from_tsv(call, input)
}
}
fn from_tsv(call: &Call, input: PipelineData) -> Result<PipelineData, ShellError> {
let name = call.head;
let noheaders = call.has_flag("noheaders");
from_delimited_data(noheaders, '\t', input, name)
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_examples() {
use crate::test_examples;
test_examples(FromTsv {})
}
}

View file

@ -415,7 +415,7 @@ pub fn eval_subexpression(
// to be used later // to be used later
// FIXME: the trimming of the end probably needs to live in a better place // FIXME: the trimming of the end probably needs to live in a better place
let mut s = input.collect_string(); let mut s = input.collect_string("");
if s.ends_with('\n') { if s.ends_with('\n') {
s.pop(); s.pop();
} }

View file

@ -51,10 +51,10 @@ impl PipelineData {
} }
} }
pub fn collect_string(self) -> String { pub fn collect_string(self, separator: &str) -> String {
match self { match self {
PipelineData::Value(v) => v.into_string("\n"), PipelineData::Value(v) => v.into_string(separator),
PipelineData::Stream(s) => s.into_string("\n"), PipelineData::Stream(s) => s.into_string(separator),
} }
} }

View file

@ -60,6 +60,10 @@ pub enum ShellError {
right_span: Span, right_span: Span,
}, },
#[error("Delimiter error")]
#[diagnostic(code(nu::shell::delimiter_error), url(docsrs))]
DelimiterError(String, #[label("{0}")] Span),
#[error("Incompatible parameters.")] #[error("Incompatible parameters.")]
#[diagnostic(code(nu::shell::incompatible_parameters), url(docsrs))] #[diagnostic(code(nu::shell::incompatible_parameters), url(docsrs))]
IncompatibleParametersSingle(String, #[label = "{0}"] Span), IncompatibleParametersSingle(String, #[label = "{0}"] Span),

View file

@ -20,12 +20,9 @@ pub struct ValueStream {
impl ValueStream { impl ValueStream {
pub fn into_string(self, separator: &str) -> String { pub fn into_string(self, separator: &str) -> String {
format!(
"[{}]",
self.map(|x: Value| x.into_string(", ")) self.map(|x: Value| x.into_string(", "))
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join(separator) .join(separator)
)
} }
pub fn from_stream( pub fn from_stream(

View file

@ -133,7 +133,7 @@ fn main() -> Result<()> {
PipelineData::new(Span::unknown()), PipelineData::new(Span::unknown()),
) { ) {
Ok(pipeline_data) => { Ok(pipeline_data) => {
println!("{}", pipeline_data.collect_string()); println!("{}", pipeline_data.collect_string("\n"));
} }
Err(err) => { Err(err) => {
let working_set = StateWorkingSet::new(&engine_state); let working_set = StateWorkingSet::new(&engine_state);
@ -273,7 +273,7 @@ fn print_value(value: Value, engine_state: &EngineState) -> Result<(), ShellErro
&Call::new(), &Call::new(),
value.into_pipeline_data(), value.into_pipeline_data(),
)?; )?;
table.collect_string() table.collect_string("\n")
} }
None => value.into_string(", "), None => value.into_string(", "),
}; };
@ -323,7 +323,7 @@ fn update_prompt<'prompt>(
&block, &block,
PipelineData::new(Span::unknown()), PipelineData::new(Span::unknown()),
) { ) {
Ok(pipeline_data) => pipeline_data.collect_string(), Ok(pipeline_data) => pipeline_data.collect_string(""),
Err(err) => { Err(err) => {
let working_set = StateWorkingSet::new(engine_state); let working_set = StateWorkingSet::new(engine_state);
report_error(&working_set, &err); report_error(&working_set, &err);