mirror of
https://github.com/nushell/nushell
synced 2024-12-27 05:23:11 +00:00
Refactor lines
command (#11685)
# Description This PR uses `str::lines` to simplify the `lines` command (and one other section of code). This has two main benefits: 1. We no longer need to use regex to split on lines, as `str::lines` splits on `\r\n` or `\n`. 2. We no longer need to handle blank empty lines at the end. E.g., `str::lines` results in `["text"]` for both `"test\n"` and `"text"`. These changes give a slight boost to performance for the following benchmarks: 1. lines of `Value::String`: ```nushell let data = open Cargo.lock 1..10000 | each { $data | timeit { lines } } | math avg ``` current main: 392µs this PR: 270µs 2. lines of external stream: ```nushell 1..10000 | each { open Cargo.lock | timeit { lines } } | math avg ``` current main: 794µs this PR: 489µs
This commit is contained in:
parent
c371d1a535
commit
cf9813cbf8
2 changed files with 59 additions and 117 deletions
|
@ -1,14 +1,12 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use nu_engine::CallExt;
|
||||
use nu_protocol::ast::Call;
|
||||
use nu_protocol::engine::{Command, EngineState, Stack};
|
||||
use nu_protocol::{
|
||||
Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
|
||||
Signature, Span, Type, Value,
|
||||
Category, Example, IntoInterruptiblePipelineData, IntoPipelineData, PipelineData, RawStream,
|
||||
ShellError, Signature, Span, Type, Value,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
// regex can be replaced with fancy-regex once it supports `split()`
|
||||
// https://github.com/fancy-regex/fancy-regex/issues/104
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Lines;
|
||||
|
@ -39,38 +37,24 @@ impl Command for Lines {
|
|||
let ctrlc = engine_state.ctrlc.clone();
|
||||
let skip_empty = call.has_flag(engine_state, stack, "skip-empty")?;
|
||||
|
||||
// match \r\n or \n
|
||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||
let span = input.span().unwrap_or(call.head);
|
||||
match input {
|
||||
#[allow(clippy::needless_collect)]
|
||||
// Collect is needed because the string may not live long enough for
|
||||
// the Rc structure to continue using it. If split could take ownership
|
||||
// of the split values, then this wouldn't be needed
|
||||
PipelineData::Value(Value::String { val, .. }, ..) => {
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
let lines = if skip_empty {
|
||||
val.lines()
|
||||
.filter_map(|s| {
|
||||
if s.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Value::string(s, span))
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
val.lines().map(|s| Value::string(s, span)).collect()
|
||||
};
|
||||
|
||||
// if the last one is empty, remove it, as it was just
|
||||
// a newline at the end of the input we got
|
||||
if let Some(last) = lines.last() {
|
||||
if last.is_empty() {
|
||||
lines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
let iter = lines.into_iter().filter_map(move |s| {
|
||||
if skip_empty && s.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Value::string(s, span))
|
||||
}
|
||||
});
|
||||
|
||||
Ok(iter.into_pipeline_data(engine_state.ctrlc.clone()))
|
||||
Ok(Value::list(lines, span).into_pipeline_data())
|
||||
}
|
||||
PipelineData::Empty => Ok(PipelineData::Empty),
|
||||
PipelineData::ListStream(stream, ..) => {
|
||||
|
@ -79,26 +63,17 @@ impl Command for Lines {
|
|||
.filter_map(move |value| {
|
||||
let span = value.span();
|
||||
if let Value::String { val, .. } = value {
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.filter_map(|s| {
|
||||
if skip_empty && s.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(s.to_string())
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// if the last one is empty, remove it, as it was just
|
||||
// a newline at the end of the input we got
|
||||
if let Some(last) = lines.last() {
|
||||
if last.is_empty() {
|
||||
lines.pop();
|
||||
}
|
||||
}
|
||||
|
||||
Some(lines.into_iter().map(move |x| Value::string(x, span)))
|
||||
Some(
|
||||
val.lines()
|
||||
.filter_map(|s| {
|
||||
if skip_empty && s.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Value::string(s, span))
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -124,11 +99,7 @@ impl Command for Lines {
|
|||
stdout: Some(stream),
|
||||
..
|
||||
} => Ok(RawStreamLinesAdapter::new(stream, head, skip_empty)
|
||||
.enumerate()
|
||||
.map(move |(_idx, x)| match x {
|
||||
Ok(x) => x,
|
||||
Err(err) => Value::error(err, head),
|
||||
})
|
||||
.map(move |x| x.unwrap_or_else(|err| Value::error(err, head)))
|
||||
.into_pipeline_data(ctrlc)),
|
||||
}
|
||||
}
|
||||
|
@ -152,38 +123,30 @@ struct RawStreamLinesAdapter {
|
|||
skip_empty: bool,
|
||||
span: Span,
|
||||
incomplete_line: String,
|
||||
queue: Vec<String>,
|
||||
queue: VecDeque<String>,
|
||||
}
|
||||
|
||||
impl Iterator for RawStreamLinesAdapter {
|
||||
type Item = Result<Value, ShellError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||
|
||||
loop {
|
||||
if !self.queue.is_empty() {
|
||||
let s = self.queue.remove(0usize);
|
||||
|
||||
if let Some(s) = self.queue.pop_front() {
|
||||
if self.skip_empty && s.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
return Some(Ok(Value::string(s, self.span)));
|
||||
} else {
|
||||
// inner is complete, feed out remaining state
|
||||
if self.inner_complete {
|
||||
if !self.incomplete_line.is_empty() {
|
||||
let r = Some(Ok(Value::string(
|
||||
self.incomplete_line.to_string(),
|
||||
return if self.incomplete_line.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(Ok(Value::string(
|
||||
std::mem::take(&mut self.incomplete_line),
|
||||
self.span,
|
||||
)));
|
||||
self.incomplete_line = String::new();
|
||||
return r;
|
||||
}
|
||||
|
||||
return None;
|
||||
)))
|
||||
};
|
||||
}
|
||||
|
||||
// pull more data from inner
|
||||
|
@ -196,36 +159,29 @@ impl Iterator for RawStreamLinesAdapter {
|
|||
Value::String { val, .. } => {
|
||||
self.span = span;
|
||||
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
let mut lines = val.lines();
|
||||
|
||||
// handle incomplete line from previous
|
||||
if !self.incomplete_line.is_empty() {
|
||||
if let Some(first) = lines.first() {
|
||||
let new_incomplete_line =
|
||||
self.incomplete_line.to_string() + first.as_str();
|
||||
lines.splice(0..1, vec![new_incomplete_line]);
|
||||
self.incomplete_line = String::new();
|
||||
}
|
||||
}
|
||||
|
||||
// store incomplete line from current
|
||||
if let Some(last) = lines.last() {
|
||||
if last.is_empty() {
|
||||
// we ended on a line ending
|
||||
lines.pop();
|
||||
} else {
|
||||
// incomplete line, save for next time
|
||||
if let Some(s) = lines.pop() {
|
||||
self.incomplete_line = s;
|
||||
}
|
||||
if let Some(first) = lines.next() {
|
||||
self.incomplete_line.push_str(first);
|
||||
self.queue.push_back(std::mem::take(
|
||||
&mut self.incomplete_line,
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// save completed lines
|
||||
self.queue.append(&mut lines);
|
||||
self.queue.extend(lines.map(String::from));
|
||||
|
||||
if !val.ends_with('\n') {
|
||||
// incomplete line, save for next time
|
||||
// if `val` and `incomplete_line` were empty,
|
||||
// then pop will return none
|
||||
if let Some(s) = self.queue.pop_back() {
|
||||
self.incomplete_line = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Propagate errors by explicitly matching them before the final case.
|
||||
Value::Error { error, .. } => return Some(Err(*error)),
|
||||
|
@ -256,7 +212,7 @@ impl RawStreamLinesAdapter {
|
|||
span,
|
||||
skip_empty,
|
||||
incomplete_line: String::new(),
|
||||
queue: Vec::<String>::new(),
|
||||
queue: VecDeque::new(),
|
||||
inner_complete: false,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
use std::fmt::{Display, LowerExp};
|
||||
use std::io;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::num::FpCategory;
|
||||
|
||||
use super::error::{Error, ErrorCode, Result};
|
||||
|
@ -1034,20 +1033,7 @@ where
|
|||
T: ser::Serialize,
|
||||
{
|
||||
let vec = to_vec(value)?;
|
||||
let string = remove_json_whitespace(vec);
|
||||
Ok(string)
|
||||
}
|
||||
|
||||
fn remove_json_whitespace(v: Vec<u8>) -> String {
|
||||
let reader = BufReader::new(&v[..]);
|
||||
let mut output = String::new();
|
||||
for line in reader.lines() {
|
||||
match line {
|
||||
Ok(line) => output.push_str(line.trim().trim_end()),
|
||||
_ => {
|
||||
eprintln!("Error removing JSON whitespace");
|
||||
}
|
||||
}
|
||||
}
|
||||
output
|
||||
let string = String::from_utf8(vec)?;
|
||||
let output = string.lines().map(str::trim).collect();
|
||||
Ok(output)
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue