mirror of
https://github.com/nushell/nushell
synced 2025-01-28 04:45:18 +00:00
Port detect columns
(#892)
This commit is contained in:
parent
95a5e9229a
commit
060a4b3f48
4 changed files with 317 additions and 1 deletions
|
@ -133,6 +133,7 @@ pub fn create_default_context(cwd: impl AsRef<Path>) -> EngineState {
|
||||||
BuildString,
|
BuildString,
|
||||||
Char,
|
Char,
|
||||||
Decode,
|
Decode,
|
||||||
|
DetectColumns,
|
||||||
Format,
|
Format,
|
||||||
Parse,
|
Parse,
|
||||||
Size,
|
Size,
|
||||||
|
|
|
@ -138,7 +138,7 @@ fn rename(
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
for (idx, val) in columns.iter().enumerate() {
|
for (idx, val) in columns.iter().enumerate() {
|
||||||
if idx > cols.len() - 1 {
|
if idx >= cols.len() {
|
||||||
// skip extra new columns names if we already reached the final column
|
// skip extra new columns names if we already reached the final column
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
313
crates/nu-command/src/strings/detect_columns.rs
Normal file
313
crates/nu-command/src/strings/detect_columns.rs
Normal file
|
@ -0,0 +1,313 @@
|
||||||
|
use std::iter::Peekable;
|
||||||
|
use std::str::CharIndices;
|
||||||
|
|
||||||
|
use nu_engine::CallExt;
|
||||||
|
use nu_protocol::ast::Call;
|
||||||
|
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||||
|
use nu_protocol::{
|
||||||
|
Category, IntoInterruptiblePipelineData, PipelineData, ShellError, Signature, Span, Spanned,
|
||||||
|
SyntaxShape, Value,
|
||||||
|
};
|
||||||
|
|
||||||
|
type Input<'t> = Peekable<CharIndices<'t>>;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct DetectColumns;
|
||||||
|
|
||||||
|
impl Command for DetectColumns {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
"detect columns"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn signature(&self) -> Signature {
|
||||||
|
Signature::build("detect columns")
|
||||||
|
.named(
|
||||||
|
"skip",
|
||||||
|
SyntaxShape::Int,
|
||||||
|
"number of rows to skip before detecting",
|
||||||
|
Some('s'),
|
||||||
|
)
|
||||||
|
.switch("no_headers", "don't detect headers", Some('n'))
|
||||||
|
.category(Category::Strings)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn usage(&self) -> &str {
|
||||||
|
"splits contents across multiple columns via the separator."
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(
|
||||||
|
&self,
|
||||||
|
engine_state: &EngineState,
|
||||||
|
stack: &mut Stack,
|
||||||
|
call: &Call,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
detect_columns(engine_state, stack, call, input)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_columns(
|
||||||
|
engine_state: &EngineState,
|
||||||
|
stack: &mut Stack,
|
||||||
|
call: &Call,
|
||||||
|
input: PipelineData,
|
||||||
|
) -> Result<PipelineData, ShellError> {
|
||||||
|
let name_span = call.head;
|
||||||
|
let num_rows_to_skip: Option<usize> = call.get_flag(engine_state, stack, "skip")?;
|
||||||
|
let noheader = call.has_flag("no_headers");
|
||||||
|
let ctrlc = engine_state.ctrlc.clone();
|
||||||
|
let config = stack.get_config()?;
|
||||||
|
let input = input.collect_string("", &config)?;
|
||||||
|
|
||||||
|
let input: Vec<_> = input
|
||||||
|
.lines()
|
||||||
|
.skip(num_rows_to_skip.unwrap_or_default())
|
||||||
|
.map(|x| x.to_string())
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut input = input.into_iter();
|
||||||
|
let headers = input.next();
|
||||||
|
|
||||||
|
if let Some(orig_headers) = headers {
|
||||||
|
let mut headers = find_columns(&orig_headers);
|
||||||
|
|
||||||
|
if noheader {
|
||||||
|
for header in headers.iter_mut().enumerate() {
|
||||||
|
header.1.item = format!("Column{}", header.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((if noheader {
|
||||||
|
vec![orig_headers].into_iter().chain(input)
|
||||||
|
} else {
|
||||||
|
vec![].into_iter().chain(input)
|
||||||
|
})
|
||||||
|
.map(move |x| {
|
||||||
|
let row = find_columns(&x);
|
||||||
|
|
||||||
|
let mut cols = vec![];
|
||||||
|
let mut vals = vec![];
|
||||||
|
|
||||||
|
if headers.len() == row.len() {
|
||||||
|
for (header, val) in headers.iter().zip(row.iter()) {
|
||||||
|
cols.push(header.item.clone());
|
||||||
|
vals.push(Value::String {
|
||||||
|
val: val.item.clone(),
|
||||||
|
span: name_span,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let mut pre_output = vec![];
|
||||||
|
|
||||||
|
// column counts don't line up, so see if we can figure out why
|
||||||
|
for cell in row {
|
||||||
|
for header in &headers {
|
||||||
|
if cell.span.start <= header.span.end && cell.span.end > header.span.start {
|
||||||
|
pre_output.push((
|
||||||
|
header.item.to_string(),
|
||||||
|
Value::string(&cell.item, name_span),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for header in &headers {
|
||||||
|
let mut found = false;
|
||||||
|
for pre_o in &pre_output {
|
||||||
|
if pre_o.0 == header.item {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
pre_output.push((header.item.to_string(), Value::nothing(name_span)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for header in &headers {
|
||||||
|
for pre_o in &pre_output {
|
||||||
|
if pre_o.0 == header.item {
|
||||||
|
cols.push(header.item.clone());
|
||||||
|
vals.push(pre_o.1.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Value::Record {
|
||||||
|
cols,
|
||||||
|
vals,
|
||||||
|
span: name_span,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.into_pipeline_data(ctrlc))
|
||||||
|
} else {
|
||||||
|
Ok(PipelineData::new(name_span))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_columns(input: &str) -> Vec<Spanned<String>> {
|
||||||
|
let mut chars = input.char_indices().peekable();
|
||||||
|
let mut output = vec![];
|
||||||
|
|
||||||
|
while let Some((_, c)) = chars.peek() {
|
||||||
|
if c.is_whitespace() {
|
||||||
|
// If the next character is non-newline whitespace, skip it.
|
||||||
|
|
||||||
|
let _ = chars.next();
|
||||||
|
} else {
|
||||||
|
// Otherwise, try to consume an unclassified token.
|
||||||
|
|
||||||
|
let result = baseline(&mut chars);
|
||||||
|
|
||||||
|
output.push(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
enum BlockKind {
|
||||||
|
Paren,
|
||||||
|
CurlyBracket,
|
||||||
|
SquareBracket,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn baseline(src: &mut Input) -> Spanned<String> {
|
||||||
|
let mut token_contents = String::new();
|
||||||
|
|
||||||
|
let start_offset = if let Some((pos, _)) = src.peek() {
|
||||||
|
*pos
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
|
|
||||||
|
// This variable tracks the starting character of a string literal, so that
|
||||||
|
// we remain inside the string literal lexer mode until we encounter the
|
||||||
|
// closing quote.
|
||||||
|
let mut quote_start: Option<char> = None;
|
||||||
|
|
||||||
|
// This Vec tracks paired delimiters
|
||||||
|
let mut block_level: Vec<BlockKind> = vec![];
|
||||||
|
|
||||||
|
// A baseline token is terminated if it's not nested inside of a paired
|
||||||
|
// delimiter and the next character is one of: `|`, `;`, `#` or any
|
||||||
|
// whitespace.
|
||||||
|
fn is_termination(block_level: &[BlockKind], c: char) -> bool {
|
||||||
|
block_level.is_empty() && (c.is_whitespace())
|
||||||
|
}
|
||||||
|
|
||||||
|
// The process of slurping up a baseline token repeats:
|
||||||
|
//
|
||||||
|
// - String literal, which begins with `'`, `"` or `\``, and continues until
|
||||||
|
// the same character is encountered again.
|
||||||
|
// - Delimiter pair, which begins with `[`, `(`, or `{`, and continues until
|
||||||
|
// the matching closing delimiter is found, skipping comments and string
|
||||||
|
// literals.
|
||||||
|
// - When not nested inside of a delimiter pair, when a terminating
|
||||||
|
// character (whitespace, `|`, `;` or `#`) is encountered, the baseline
|
||||||
|
// token is done.
|
||||||
|
// - Otherwise, accumulate the character into the current baseline token.
|
||||||
|
while let Some((_, c)) = src.peek() {
|
||||||
|
let c = *c;
|
||||||
|
|
||||||
|
if quote_start.is_some() {
|
||||||
|
// If we encountered the closing quote character for the current
|
||||||
|
// string, we're done with the current string.
|
||||||
|
if Some(c) == quote_start {
|
||||||
|
quote_start = None;
|
||||||
|
}
|
||||||
|
} else if c == '\n' {
|
||||||
|
if is_termination(&block_level, c) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if c == '\'' || c == '"' || c == '`' {
|
||||||
|
// We encountered the opening quote of a string literal.
|
||||||
|
quote_start = Some(c);
|
||||||
|
} else if c == '[' {
|
||||||
|
// We encountered an opening `[` delimiter.
|
||||||
|
block_level.push(BlockKind::SquareBracket);
|
||||||
|
} else if c == ']' {
|
||||||
|
// We encountered a closing `]` delimiter. Pop off the opening `[`
|
||||||
|
// delimiter.
|
||||||
|
if let Some(BlockKind::SquareBracket) = block_level.last() {
|
||||||
|
let _ = block_level.pop();
|
||||||
|
}
|
||||||
|
} else if c == '{' {
|
||||||
|
// We encountered an opening `{` delimiter.
|
||||||
|
block_level.push(BlockKind::CurlyBracket);
|
||||||
|
} else if c == '}' {
|
||||||
|
// We encountered a closing `}` delimiter. Pop off the opening `{`.
|
||||||
|
if let Some(BlockKind::CurlyBracket) = block_level.last() {
|
||||||
|
let _ = block_level.pop();
|
||||||
|
}
|
||||||
|
} else if c == '(' {
|
||||||
|
// We enceountered an opening `(` delimiter.
|
||||||
|
block_level.push(BlockKind::Paren);
|
||||||
|
} else if c == ')' {
|
||||||
|
// We encountered a closing `)` delimiter. Pop off the opening `(`.
|
||||||
|
if let Some(BlockKind::Paren) = block_level.last() {
|
||||||
|
let _ = block_level.pop();
|
||||||
|
}
|
||||||
|
} else if is_termination(&block_level, c) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, accumulate the character into the current token.
|
||||||
|
token_contents.push(c);
|
||||||
|
|
||||||
|
// Consume the character.
|
||||||
|
let _ = src.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
let span = Span::new(start_offset, start_offset + token_contents.len());
|
||||||
|
|
||||||
|
// If there is still unclosed opening delimiters, close them and add
|
||||||
|
// synthetic closing characters to the accumulated token.
|
||||||
|
if block_level.last().is_some() {
|
||||||
|
// let delim: char = (*block).closing();
|
||||||
|
// let cause = ParseError::unexpected_eof(delim.to_string(), span);
|
||||||
|
|
||||||
|
// while let Some(bk) = block_level.pop() {
|
||||||
|
// token_contents.push(bk.closing());
|
||||||
|
// }
|
||||||
|
|
||||||
|
return Spanned {
|
||||||
|
item: token_contents,
|
||||||
|
span,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if quote_start.is_some() {
|
||||||
|
// The non-lite parse trims quotes on both sides, so we add the expected quote so that
|
||||||
|
// anyone wanting to consume this partial parse (e.g., completions) will be able to get
|
||||||
|
// correct information from the non-lite parse.
|
||||||
|
// token_contents.push(delimiter);
|
||||||
|
|
||||||
|
// return (
|
||||||
|
// token_contents.spanned(span),
|
||||||
|
// Some(ParseError::unexpected_eof(delimiter.to_string(), span)),
|
||||||
|
// );
|
||||||
|
return Spanned {
|
||||||
|
item: token_contents,
|
||||||
|
span,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Spanned {
|
||||||
|
item: token_contents,
|
||||||
|
span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_examples() {
|
||||||
|
crate::test_examples(DetectColumns)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
mod build_string;
|
mod build_string;
|
||||||
mod char_;
|
mod char_;
|
||||||
mod decode;
|
mod decode;
|
||||||
|
mod detect_columns;
|
||||||
mod format;
|
mod format;
|
||||||
mod parse;
|
mod parse;
|
||||||
mod size;
|
mod size;
|
||||||
|
@ -10,6 +11,7 @@ mod str_;
|
||||||
pub use build_string::BuildString;
|
pub use build_string::BuildString;
|
||||||
pub use char_::Char;
|
pub use char_::Char;
|
||||||
pub use decode::*;
|
pub use decode::*;
|
||||||
|
pub use detect_columns::*;
|
||||||
pub use format::*;
|
pub use format::*;
|
||||||
pub use parse::*;
|
pub use parse::*;
|
||||||
pub use size::Size;
|
pub use size::Size;
|
||||||
|
|
Loading…
Reference in a new issue