mirror of
https://github.com/nushell/nushell
synced 2024-12-27 05:23:11 +00:00
Handle mixed LF+CRLF in lines
(#7316)
This closes #4989. Previously `lines` was unable to handle text input with CRLF line breaks _and_ LF line breaks. ### Before: ![image](https://user-images.githubusercontent.com/26268125/205207685-b25da9e1-19fa-4abb-8ab2-0dd216c63fc0.png) ### After: ![image](https://user-images.githubusercontent.com/26268125/205207808-9f687242-a8c2-4b79-a12c-38b0583d8d52.png)
This commit is contained in:
parent
3ac36879e0
commit
ee5a387300
4 changed files with 32 additions and 17 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2638,6 +2638,7 @@ dependencies = [
|
|||
"rand 0.8.5",
|
||||
"rayon",
|
||||
"reedline",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"roxmltree",
|
||||
"rstest",
|
||||
|
|
|
@ -68,6 +68,7 @@ powierza-coefficient = "1.0.1"
|
|||
quick-xml = "0.25"
|
||||
rand = "0.8"
|
||||
rayon = "1.5.1"
|
||||
regex = "1.6.0"
|
||||
reqwest = {version = "0.11", features = ["blocking", "json"] }
|
||||
roxmltree = "0.16.0"
|
||||
rust-embed = "6.3.0"
|
||||
|
|
|
@ -4,6 +4,10 @@ use nu_protocol::{
|
|||
Category, Example, IntoInterruptiblePipelineData, PipelineData, RawStream, ShellError,
|
||||
Signature, Span, Type, Value,
|
||||
};
|
||||
use once_cell::sync::Lazy;
|
||||
// regex can be replaced with fancy-regex once it suppports `split()`
|
||||
// https://github.com/fancy-regex/fancy-regex/issues/104
|
||||
use regex::Regex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Lines;
|
||||
|
@ -34,16 +38,18 @@ impl Command for Lines {
|
|||
let head = call.head;
|
||||
let ctrlc = engine_state.ctrlc.clone();
|
||||
let skip_empty = call.has_flag("skip-empty");
|
||||
|
||||
// match \r\n or \n
|
||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||
match input {
|
||||
#[allow(clippy::needless_collect)]
|
||||
// Collect is needed because the string may not live long enough for
|
||||
// the Rc structure to continue using it. If split could take ownership
|
||||
// of the split values, then this wouldn't be needed
|
||||
PipelineData::Value(Value::String { val, span }, ..) => {
|
||||
let split_char = if val.contains("\r\n") { "\r\n" } else { "\n" };
|
||||
|
||||
let mut lines = val
|
||||
.split(split_char)
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
|
@ -66,18 +72,12 @@ impl Command for Lines {
|
|||
Ok(iter.into_pipeline_data(engine_state.ctrlc.clone()))
|
||||
}
|
||||
PipelineData::ListStream(stream, ..) => {
|
||||
let mut split_char = "\n";
|
||||
|
||||
let iter = stream
|
||||
.into_iter()
|
||||
.filter_map(move |value| {
|
||||
if let Value::String { val, span } = value {
|
||||
if split_char != "\r\n" && val.contains("\r\n") {
|
||||
split_char = "\r\n";
|
||||
}
|
||||
|
||||
let mut lines = val
|
||||
.split(split_char)
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.filter_map(|s| {
|
||||
if skip_empty && s.trim().is_empty() {
|
||||
None
|
||||
|
@ -153,6 +153,9 @@ impl Iterator for RawStreamLinesAdapter {
|
|||
type Item = Result<Value, ShellError>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
static LINE_BREAK_REGEX: Lazy<Regex> =
|
||||
Lazy::new(|| Regex::new(r"\r\n|\n").expect("unable to compile regex"));
|
||||
|
||||
loop {
|
||||
if !self.queue.is_empty() {
|
||||
let s = self.queue.remove(0usize);
|
||||
|
@ -188,11 +191,8 @@ impl Iterator for RawStreamLinesAdapter {
|
|||
Value::String { val, span } => {
|
||||
self.span = span;
|
||||
|
||||
let split_char =
|
||||
if val.contains("\r\n") { "\r\n" } else { "\n" };
|
||||
|
||||
let mut lines = val
|
||||
.split(split_char)
|
||||
let mut lines = LINE_BREAK_REGEX
|
||||
.split(&val)
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
|
@ -48,3 +48,16 @@ fn lines_multi_value_split() {
|
|||
|
||||
assert_eq!(actual.out, "6");
|
||||
}
|
||||
|
||||
/// test whether this handles CRLF and LF in the same input
|
||||
#[test]
|
||||
fn lines_mixed_line_endings() {
|
||||
let actual = nu!(
|
||||
cwd: "tests/fixtures/formats", pipeline(
|
||||
r#"
|
||||
"foo\nbar\r\nquux" | lines | length
|
||||
"#
|
||||
));
|
||||
|
||||
assert_eq!(actual.out, "3");
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue