mirror of
https://github.com/nushell/nushell
synced 2024-12-29 06:23:11 +00:00
Refactor lines
command (#11685)
# Description This PR uses `str::lines` to simplify the `lines` command (and one other section of code). This has two main benefits: 1. We no longer need to use regex to split on lines, as `str::lines` splits on `\r\n` or `\n`. 2. We no longer need to handle blank empty lines at the end. E.g., `str::lines` results in `["text"]` for both `"test\n"` and `"text"`. These changes give a slight boost to performance for the following benchmarks: 1. lines of `Value::String`: ```nushell let data = open Cargo.lock 1..10000 | each { $data | timeit { lines } } | math avg ``` current main: 392µs this PR: 270µs 2. lines of external stream: ```nushell 1..10000 | each { open Cargo.lock | timeit { lines } } | math avg ``` current main: 794µs this PR: 489µs
This commit is contained in:
parent
c371d1a535
commit
cf9813cbf8
2 changed files with 59 additions and 117 deletions
|
@ -1,14 +1,12 @@
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use nu_engine::CallExt;
|
use nu_engine::CallExt;
|
||||||
use nu_protocol::ast::Call;
|
use nu_protocol::ast::Call;
|
||||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||||
use nu_protocol::{
|
use nu_protocol::{
|
||||||
Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
|
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, RawStream,
|
||||||
Signature, Span, Type, Value,
|
ShellError, Signature, Span, Type, Value,
|
||||||
};
|
};
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
// regex can be replaced with fancy-regex once it supports `split()`
|
|
||||||
// https://github.com/fancy-regex/fancy-regex/issues/104
|
|
||||||
use regex::Regex;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Lines;
|
pub struct Lines;
|
||||||
|
@ -39,38 +37,24 @@ impl Command for Lines {
|
||||||
let ctrlc = engine_state.ctrlc.clone();
|
let ctrlc = engine_state.ctrlc.clone();
|
||||||
let skip_empty = call.has_flag(engine_state, stack, "skip-empty")?;
|
let skip_empty = call.has_flag(engine_state, stack, "skip-empty")?;
|
||||||
|
|
||||||
// match \r\n or \n
|
|
||||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
|
||||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
|
||||||
let span = input.span().unwrap_or(call.head);
|
let span = input.span().unwrap_or(call.head);
|
||||||
match input {
|
match input {
|
||||||
#[allow(clippy::needless_collect)]
|
|
||||||
// Collect is needed because the string may not live long enough for
|
|
||||||
// the Rc structure to continue using it. If split could take ownership
|
|
||||||
// of the split values, then this wouldn't be needed
|
|
||||||
PipelineData::Value(Value::String { val, .. }, ..) => {
|
PipelineData::Value(Value::String { val, .. }, ..) => {
|
||||||
let mut lines = LINE_BREAK_REGEX
|
let lines = if skip_empty {
|
||||||
.split(&val)
|
val.lines()
|
||||||
.map(|s| s.to_string())
|
.filter_map(|s| {
|
||||||
.collect::<Vec<String>>();
|
if s.trim().is_empty() {
|
||||||
|
|
||||||
// if the last one is empty, remove it, as it was just
|
|
||||||
// a newline at the end of the input we got
|
|
||||||
if let Some(last) = lines.last() {
|
|
||||||
if last.is_empty() {
|
|
||||||
lines.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let iter = lines.into_iter().filter_map(move |s| {
|
|
||||||
if skip_empty && s.trim().is_empty() {
|
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(Value::string(s, span))
|
Some(Value::string(s, span))
|
||||||
}
|
}
|
||||||
});
|
})
|
||||||
|
.collect()
|
||||||
|
} else {
|
||||||
|
val.lines().map(|s| Value::string(s, span)).collect()
|
||||||
|
};
|
||||||
|
|
||||||
Ok(iter.into_pipeline_data(engine_state.ctrlc.clone()))
|
Ok(Value::list(lines, span).into_pipeline_data())
|
||||||
}
|
}
|
||||||
PipelineData::Empty => Ok(PipelineData::Empty),
|
PipelineData::Empty => Ok(PipelineData::Empty),
|
||||||
PipelineData::ListStream(stream, ..) => {
|
PipelineData::ListStream(stream, ..) => {
|
||||||
|
@ -79,26 +63,17 @@ impl Command for Lines {
|
||||||
.filter_map(move |value| {
|
.filter_map(move |value| {
|
||||||
let span = value.span();
|
let span = value.span();
|
||||||
if let Value::String { val, .. } = value {
|
if let Value::String { val, .. } = value {
|
||||||
let mut lines = LINE_BREAK_REGEX
|
Some(
|
||||||
.split(&val)
|
val.lines()
|
||||||
.filter_map(|s| {
|
.filter_map(|s| {
|
||||||
if skip_empty && s.trim().is_empty() {
|
if skip_empty && s.trim().is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(s.to_string())
|
Some(Value::string(s, span))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>(),
|
||||||
|
)
|
||||||
// if the last one is empty, remove it, as it was just
|
|
||||||
// a newline at the end of the input we got
|
|
||||||
if let Some(last) = lines.last() {
|
|
||||||
if last.is_empty() {
|
|
||||||
lines.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(lines.into_iter().map(move |x| Value::string(x, span)))
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
@ -124,11 +99,7 @@ impl Command for Lines {
|
||||||
stdout: Some(stream),
|
stdout: Some(stream),
|
||||||
..
|
..
|
||||||
} => Ok(RawStreamLinesAdapter::new(stream, head, skip_empty)
|
} => Ok(RawStreamLinesAdapter::new(stream, head, skip_empty)
|
||||||
.enumerate()
|
.map(move |x| x.unwrap_or_else(|err| Value::error(err, head)))
|
||||||
.map(move |(_idx, x)| match x {
|
|
||||||
Ok(x) => x,
|
|
||||||
Err(err) => Value::error(err, head),
|
|
||||||
})
|
|
||||||
.into_pipeline_data(ctrlc)),
|
.into_pipeline_data(ctrlc)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -152,38 +123,30 @@ struct RawStreamLinesAdapter {
|
||||||
skip_empty: bool,
|
skip_empty: bool,
|
||||||
span: Span,
|
span: Span,
|
||||||
incomplete_line: String,
|
incomplete_line: String,
|
||||||
queue: Vec<String>,
|
queue: VecDeque<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for RawStreamLinesAdapter {
|
impl Iterator for RawStreamLinesAdapter {
|
||||||
type Item = Result<Value, ShellError>;
|
type Item = Result<Value, ShellError>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
|
||||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
if !self.queue.is_empty() {
|
if let Some(s) = self.queue.pop_front() {
|
||||||
let s = self.queue.remove(0usize);
|
|
||||||
|
|
||||||
if self.skip_empty && s.trim().is_empty() {
|
if self.skip_empty && s.trim().is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
return Some(Ok(Value::string(s, self.span)));
|
return Some(Ok(Value::string(s, self.span)));
|
||||||
} else {
|
} else {
|
||||||
// inner is complete, feed out remaining state
|
// inner is complete, feed out remaining state
|
||||||
if self.inner_complete {
|
if self.inner_complete {
|
||||||
if !self.incomplete_line.is_empty() {
|
return if self.incomplete_line.is_empty() {
|
||||||
let r = Some(Ok(Value::string(
|
None
|
||||||
self.incomplete_line.to_string(),
|
} else {
|
||||||
|
Some(Ok(Value::string(
|
||||||
|
std::mem::take(&mut self.incomplete_line),
|
||||||
self.span,
|
self.span,
|
||||||
)));
|
)))
|
||||||
self.incomplete_line = String::new();
|
};
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
return None;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// pull more data from inner
|
// pull more data from inner
|
||||||
|
@ -196,36 +159,29 @@ impl Iterator for RawStreamLinesAdapter {
|
||||||
Value::String { val, .. } => {
|
Value::String { val, .. } => {
|
||||||
self.span = span;
|
self.span = span;
|
||||||
|
|
||||||
let mut lines = LINE_BREAK_REGEX
|
let mut lines = val.lines();
|
||||||
.split(&val)
|
|
||||||
.map(|s| s.to_string())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// handle incomplete line from previous
|
// handle incomplete line from previous
|
||||||
if !self.incomplete_line.is_empty() {
|
if !self.incomplete_line.is_empty() {
|
||||||
if let Some(first) = lines.first() {
|
if let Some(first) = lines.next() {
|
||||||
let new_incomplete_line =
|
self.incomplete_line.push_str(first);
|
||||||
self.incomplete_line.to_string() + first.as_str();
|
self.queue.push_back(std::mem::take(
|
||||||
lines.splice(0..1, vec![new_incomplete_line]);
|
&mut self.incomplete_line,
|
||||||
self.incomplete_line = String::new();
|
));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// store incomplete line from current
|
|
||||||
if let Some(last) = lines.last() {
|
|
||||||
if last.is_empty() {
|
|
||||||
// we ended on a line ending
|
|
||||||
lines.pop();
|
|
||||||
} else {
|
|
||||||
// incomplete line, save for next time
|
|
||||||
if let Some(s) = lines.pop() {
|
|
||||||
self.incomplete_line = s;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// save completed lines
|
// save completed lines
|
||||||
self.queue.append(&mut lines);
|
self.queue.extend(lines.map(String::from));
|
||||||
|
|
||||||
|
if !val.ends_with('\n') {
|
||||||
|
// incomplete line, save for next time
|
||||||
|
// if `val` and `incomplete_line` were empty,
|
||||||
|
// then pop will return none
|
||||||
|
if let Some(s) = self.queue.pop_back() {
|
||||||
|
self.incomplete_line = s;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Propagate errors by explicitly matching them before the final case.
|
// Propagate errors by explicitly matching them before the final case.
|
||||||
Value::Error { error, .. } => return Some(Err(*error)),
|
Value::Error { error, .. } => return Some(Err(*error)),
|
||||||
|
@ -256,7 +212,7 @@ impl RawStreamLinesAdapter {
|
||||||
span,
|
span,
|
||||||
skip_empty,
|
skip_empty,
|
||||||
incomplete_line: String::new(),
|
incomplete_line: String::new(),
|
||||||
queue: Vec::<String>::new(),
|
queue: VecDeque::new(),
|
||||||
inner_complete: false,
|
inner_complete: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,6 @@
|
||||||
|
|
||||||
use std::fmt::{Display, LowerExp};
|
use std::fmt::{Display, LowerExp};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::{BufRead, BufReader};
|
|
||||||
use std::num::FpCategory;
|
use std::num::FpCategory;
|
||||||
|
|
||||||
use super::error::{Error, ErrorCode, Result};
|
use super::error::{Error, ErrorCode, Result};
|
||||||
|
@ -1034,20 +1033,7 @@ where
|
||||||
T: ser::Serialize,
|
T: ser::Serialize,
|
||||||
{
|
{
|
||||||
let vec = to_vec(value)?;
|
let vec = to_vec(value)?;
|
||||||
let string = remove_json_whitespace(vec);
|
let string = String::from_utf8(vec)?;
|
||||||
Ok(string)
|
let output = string.lines().map(str::trim).collect();
|
||||||
}
|
Ok(output)
|
||||||
|
|
||||||
fn remove_json_whitespace(v: Vec<u8>) -> String {
|
|
||||||
let reader = BufReader::new(&v[..]);
|
|
||||||
let mut output = String::new();
|
|
||||||
for line in reader.lines() {
|
|
||||||
match line {
|
|
||||||
Ok(line) => output.push_str(line.trim().trim_end()),
|
|
||||||
_ => {
|
|
||||||
eprintln!("Error removing JSON whitespace");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue