add raw-string literal support (#9956)

# Description

This PR adds raw string support by using `r#` at the beginning of single
quoted strings and `#` at the end.

Notice that escapes do not process, even within single quotes,
parentheses don't mean anything, $variables don't mean anything. It's
just a string.
```nushell
❯ echo r#'one\ntwo (blah) ($var)'#
one\ntwo (blah) ($var)
```
Notice how they work without `echo` or `print` and how they work without
carriage returns.
```nushell
❯ r#'adsfa'#
adsfa
❯ r##"asdfa'@qpejq'##
asdfa'@qpejq
❯ r#'asdfasdfasf
∙ foqwejfqo@'23rfjqf'#
```
They also have a special configurable color in the repl. (use single
quotes though)

![image](https://github.com/nushell/nushell/assets/343840/8780e21d-de4c-45b3-9880-2425f5fe10ef)

They should work like rust raw literals and allow `r##`, `r###`,
`r####`, etc, to help with having one or many `#`'s in the middle of
your raw-string.

They should work with `let` as well.

```nushell
r#'some\nraw\nstring'# | str upcase
```

closes https://github.com/nushell/nushell/issues/5091
# User-Facing Changes
<!-- List of all changes that impact the user experience here. This
helps us keep track of breaking changes. -->

# Tests + Formatting
<!--
Don't forget to add tests that cover your changes.

Make sure you've run and fixed any issues with these commands:

- `cargo fmt --all -- --check` to check standard code formatting (`cargo
fmt --all` applies these changes)
- `cargo clippy --workspace -- -D warnings -D clippy::unwrap_used -A
clippy::needless_collect -A clippy::result_large_err` to check that
you're using the standard code style
- `cargo test --workspace` to check that all tests pass
- `cargo run -- -c "use std testing; testing run-tests --path
crates/nu-std"` to run the tests for the standard library

> **Note**
> from `nushell` you can also use the `toolkit` as follows
> ```bash
> use toolkit.nu # or use an `env_change` hook to activate it
automatically
> toolkit check pr
> ```
-->

# After Submitting
<!-- If your PR had any user-facing changes, update [the
documentation](https://github.com/nushell/nushell.github.io) after the
PR is merged, if necessary. This will help us keep the docs up to date.
-->

---------

Co-authored-by: WindSoilder <WindSoilder@outlook.com>
Co-authored-by: Ian Manske <ian.manske@pm.me>
This commit is contained in:
Darren Schroeder 2024-05-02 09:36:37 -04:00 committed by GitHub
parent b5741ef14b
commit 8ed0d84d6a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 241 additions and 3 deletions

View file

@ -128,6 +128,7 @@ impl Highlighter for NuHighlighter {
FlatShape::Operator => add_colored_token(&shape.1, next_token),
FlatShape::Signature => add_colored_token(&shape.1, next_token),
FlatShape::String => add_colored_token(&shape.1, next_token),
FlatShape::RawString => add_colored_token(&shape.1, next_token),
FlatShape::StringInterpolation => add_colored_token(&shape.1, next_token),
FlatShape::DateTime => add_colored_token(&shape.1, next_token),
FlatShape::List => {
@ -353,6 +354,7 @@ fn find_matching_block_end_in_expr(
Expr::Directory(_, _) => None,
Expr::GlobPattern(_, _) => None,
Expr::String(_) => None,
Expr::RawString(_) => None,
Expr::CellPath(_) => None,
Expr::ImportPattern(_) => None,
Expr::Overlay(_) => None,

View file

@ -32,6 +32,7 @@ pub fn default_shape_color(shape: String) -> Style {
"shape_or" => Style::new().fg(Color::Purple).bold(),
"shape_pipe" => Style::new().fg(Color::Purple).bold(),
"shape_range" => Style::new().fg(Color::Yellow).bold(),
"shape_raw_string" => Style::new().fg(Color::LightMagenta).bold(),
"shape_record" => Style::new().fg(Color::Cyan).bold(),
"shape_redirection" => Style::new().fg(Color::Purple).bold(),
"shape_signature" => Style::new().fg(Color::Green).bold(),

View file

@ -91,3 +91,18 @@ fn let_glob_type() {
let actual = nu!("let x: glob = 'aa'; $x | describe");
assert_eq!(actual.out, "glob");
}
#[test]
fn let_raw_string() {
let actual = nu!(r#"let x = r#'abcde""fghi"''''jkl'#; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#);
let actual = nu!(r#"let x = r##'abcde""fghi"''''#jkl'##; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#);
let actual = nu!(r#"let x = r###'abcde""fghi"'''##'#jkl'###; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#);
let actual = nu!(r#"let x = r#'abc'#; $x"#);
assert_eq!(actual.out, "abc");
}

View file

@ -125,3 +125,18 @@ fn mut_glob_type() {
let actual = nu!("mut x: glob = 'aa'; $x | describe");
assert_eq!(actual.out, "glob");
}
#[test]
fn mut_raw_string() {
let actual = nu!(r#"mut x = r#'abcde""fghi"''''jkl'#; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#);
let actual = nu!(r#"mut x = r##'abcde""fghi"''''#jkl'##; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#);
let actual = nu!(r#"mut x = r###'abcde""fghi"'''##'#jkl'###; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#);
let actual = nu!(r#"mut x = r#'abc'#; $x"#);
assert_eq!(actual.out, "abc");
}

View file

@ -38,6 +38,7 @@ pub enum FlatShape {
Or,
Pipe,
Range,
RawString,
Record,
Redirection,
Signature,
@ -78,6 +79,7 @@ impl Display for FlatShape {
FlatShape::Or => write!(f, "shape_or"),
FlatShape::Pipe => write!(f, "shape_pipe"),
FlatShape::Range => write!(f, "shape_range"),
FlatShape::RawString => write!(f, "shape_raw_string"),
FlatShape::Record => write!(f, "shape_record"),
FlatShape::Redirection => write!(f, "shape_redirection"),
FlatShape::Signature => write!(f, "shape_signature"),
@ -509,6 +511,9 @@ pub fn flatten_expression(
Expr::String(_) => {
vec![(expr.span, FlatShape::String)]
}
Expr::RawString(_) => {
vec![(expr.span, FlatShape::RawString)]
}
Expr::Table(table) => {
let outer_span = expr.span;
let mut last_end = outer_span.start;

View file

@ -503,6 +503,79 @@ fn lex_internal(
} else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) {
// If the next character is non-newline whitespace, skip it.
curr_offset += 1;
} else if c == b'r' {
// A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#`
// If the next character is `#` we're probably looking at a raw string literal
// so we need to read all the text until we find a closing `#`. This raw string
// can contain any character, including newlines and double quotes without needing
// to escape them.
//
// A raw string can contain many `#` as prefix,
// incase if there is a `'#` or `#'` in the string itself.
// E.g: r##'I can use '#' in a raw string'##
let mut prefix_sharp_cnt = 0;
let start = curr_offset;
while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) {
prefix_sharp_cnt += 1;
}
if prefix_sharp_cnt != 0 {
// curr_offset is the character `r`, we need to move forward and skip all `#`
// characters.
//
// e.g: r###'<body>
// ^
// ^
// curr_offset
curr_offset += prefix_sharp_cnt + 1;
// the next one should be a single quote.
if input.get(curr_offset) != Some(&b'\'') {
error = Some(ParseError::Expected(
"'",
Span::new(span_offset + curr_offset, span_offset + curr_offset + 1),
));
}
curr_offset += 1;
let mut matches = false;
while let Some(ch) = input.get(curr_offset) {
// check for postfix '###
if *ch == b'#' {
let start_ch = input[curr_offset - prefix_sharp_cnt];
let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset];
if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') {
matches = true;
curr_offset += 1;
break;
}
}
curr_offset += 1
}
if matches {
output.push(Token::new(
TokenContents::Item,
Span::new(span_offset + start, span_offset + curr_offset),
));
} else if error.is_none() {
error = Some(ParseError::UnexpectedEof(
"#".to_string(),
Span::new(span_offset + curr_offset, span_offset + curr_offset),
))
}
} else {
let (token, err) = lex_item(
input,
&mut curr_offset,
span_offset,
additional_whitespace,
special_tokens,
in_signature,
);
if error.is_none() {
error = err;
}
output.push(token);
}
} else {
let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset);
if let Some(token) = token {

View file

@ -3341,6 +3341,7 @@ pub fn parse_mut(working_set: &mut StateWorkingSet, spans: &[Span]) -> Pipeline
}
pub fn parse_source(working_set: &mut StateWorkingSet, lite_command: &LiteCommand) -> Pipeline {
trace!("parsing source");
let spans = &lite_command.parts;
let name = working_set.get_span_contents(spans[0]);

View file

@ -66,6 +66,11 @@ pub fn is_math_expression_like(working_set: &mut StateWorkingSet, span: Span) ->
let b = bytes[0];
// check for raw string
if bytes.starts_with(b"r#") {
return true;
}
if b == b'(' || b == b'{' || b == b'[' || b == b'$' || b == b'"' || b == b'\'' || b == b'-' {
return true;
}
@ -578,6 +583,7 @@ pub fn parse_multispan_value(
spans_idx: &mut usize,
shape: &SyntaxShape,
) -> Expression {
trace!("parse multispan value");
match shape {
SyntaxShape::VarWithOptType => {
trace!("parsing: var with opt type");
@ -1565,6 +1571,66 @@ pub(crate) fn parse_dollar_expr(working_set: &mut StateWorkingSet, span: Span) -
}
}
pub fn parse_raw_string(working_set: &mut StateWorkingSet, span: Span) -> Expression {
trace!("parsing: raw-string, with required delimiters");
let bytes = working_set.get_span_contents(span);
let prefix_sharp_cnt = if bytes.starts_with(b"r#") {
// actually `sharp_cnt` is always `index - 1`
// but create a variable here to make it clearer.
let mut sharp_cnt = 1;
let mut index = 2;
while index < bytes.len() && bytes[index] == b'#' {
index += 1;
sharp_cnt += 1;
}
sharp_cnt
} else {
working_set.error(ParseError::Expected("r#", span));
return garbage(span);
};
let expect_postfix_sharp_cnt = prefix_sharp_cnt;
// check the length of whole raw string.
// the whole raw string should contains at least
// 1(r) + prefix_sharp_cnt + 1(') + 1(') + postfix_sharp characters
if bytes.len() < prefix_sharp_cnt + expect_postfix_sharp_cnt + 3 {
working_set.error(ParseError::Unclosed('\''.into(), span));
return garbage(span);
}
// check for unbalanced # and single quotes.
let postfix_bytes = &bytes[bytes.len() - expect_postfix_sharp_cnt..bytes.len()];
if postfix_bytes.iter().any(|b| *b != b'#') {
working_set.error(ParseError::Unbalanced(
"prefix #".to_string(),
"postfix #".to_string(),
span,
));
return garbage(span);
}
// check for unblanaced single quotes.
if bytes[1 + prefix_sharp_cnt] != b'\''
|| bytes[bytes.len() - expect_postfix_sharp_cnt - 1] != b'\''
{
working_set.error(ParseError::Unclosed('\''.into(), span));
return garbage(span);
}
let bytes = &bytes[prefix_sharp_cnt + 1 + 1..bytes.len() - 1 - prefix_sharp_cnt];
if let Ok(token) = String::from_utf8(bytes.into()) {
Expression {
expr: Expr::RawString(token),
span,
ty: Type::String,
custom_completion: None,
}
} else {
working_set.error(ParseError::Expected("utf8 raw-string", span));
garbage(span)
}
}
pub fn parse_paren_expr(
working_set: &mut StateWorkingSet,
span: Span,
@ -4553,6 +4619,9 @@ pub fn parse_value(
return Expression::garbage(span);
}
},
b'r' if bytes.len() > 1 && bytes[1] == b'#' => {
return parse_raw_string(working_set, span);
}
_ => {}
}
@ -6075,6 +6144,7 @@ pub fn discover_captures_in_expr(
}
}
Expr::String(_) => {}
Expr::RawString(_) => {}
Expr::StringInterpolation(exprs) => {
for expr in exprs {
discover_captures_in_expr(working_set, expr, seen, seen_blocks, output)?;
@ -6236,6 +6306,7 @@ pub fn parse(
contents: &[u8],
scoped: bool,
) -> Arc<Block> {
trace!("parse");
let name = match fname {
Some(fname) => {
// use the canonical name for this filename
@ -6253,9 +6324,13 @@ pub fn parse(
let mut output = {
if let Some(block) = previously_parsed_block {
// dbg!("previous block");
return block;
} else {
// dbg!("starting lex");
let (output, err) = lex(contents, new_span.start, &[], &[], false);
// dbg!("finished lex");
// dbg!(&output);
if let Some(err) = err {
working_set.error(err)
}

View file

@ -36,6 +36,7 @@ pub enum Expr {
Directory(String, bool),
GlobPattern(String, bool),
String(String),
RawString(String),
CellPath(CellPath),
FullCellPath(Box<FullCellPath>),
ImportPattern(Box<ImportPattern>),
@ -80,6 +81,7 @@ impl Expr {
| Expr::ValueWithUnit(_)
| Expr::DateTime(_)
| Expr::String(_)
| Expr::RawString(_)
| Expr::CellPath(_)
| Expr::StringInterpolation(_)
| Expr::Nothing => {

View file

@ -279,6 +279,7 @@ impl Expression {
}
Expr::Signature(_) => false,
Expr::String(_) => false,
Expr::RawString(_) => false,
Expr::RowCondition(block_id) | Expr::Subexpression(block_id) => {
let block = working_set.get_block(*block_id);
@ -436,6 +437,7 @@ impl Expression {
}
Expr::Signature(_) => {}
Expr::String(_) => {}
Expr::RawString(_) => {}
Expr::StringInterpolation(items) => {
for i in items {
i.replace_span(working_set, replaced, new_span)

View file

@ -253,7 +253,7 @@ fn expr_to_string(engine_state: &EngineState, expr: &Expr) -> String {
Expr::Record(_) => "record".to_string(),
Expr::RowCondition(_) => "row condition".to_string(),
Expr::Signature(_) => "signature".to_string(),
Expr::String(_) => "string".to_string(),
Expr::String(_) | Expr::RawString(_) => "string".to_string(),
Expr::StringInterpolation(_) => "string interpolation".to_string(),
Expr::Subexpression(_) => "subexpression".to_string(),
Expr::Table(_) => "table".to_string(),

View file

@ -139,7 +139,7 @@ pub trait Eval {
Ok(Value::list(output_rows, expr.span))
}
Expr::Keyword(kw) => Self::eval::<D>(state, mut_state, &kw.expr),
Expr::String(s) => Ok(Value::string(s.clone(), expr.span)),
Expr::String(s) | Expr::RawString(s) => Ok(Value::string(s.clone(), expr.span)),
Expr::Nothing => Ok(Value::nothing(expr.span)),
Expr::ValueWithUnit(value) => match Self::eval::<D>(state, mut_state, &value.expr)? {
Value::Int { val, .. } => value.unit.item.build_value(val, value.unit.span),

View file

@ -69,6 +69,7 @@ let dark_theme = {
shape_table: blue_bold
shape_variable: purple
shape_vardecl: purple
shape_raw_string: light_purple
}
let light_theme = {
@ -134,6 +135,7 @@ let light_theme = {
shape_table: blue_bold
shape_variable: purple
shape_vardecl: purple
shape_raw_string: light_purple
}
# External completer example

View file

@ -319,7 +319,7 @@ fn convert_to_value(
msg: "signatures not supported in nuon".into(),
span: expr.span,
}),
Expr::String(s) => Ok(Value::string(s, span)),
Expr::String(s) | Expr::RawString(s) => Ok(Value::string(s, span)),
Expr::StringInterpolation(..) => Err(ShellError::OutsideSpannedLabeledError {
src: original_text.to_string(),
error: "Error when loading".into(),

View file

@ -569,6 +569,16 @@ pub fn hover(engine_state: &mut EngineState, file_path: &str, location: &Value)
}
})
),
FlatShape::RawString => println!(
"{}",
json!({
"hover": "raw-string",
"span": {
"start": span.start - offset,
"end": span.end - offset
}
})
),
FlatShape::StringInterpolation => println!(
"{}",
json!({

View file

@ -71,3 +71,23 @@ fn case_insensitive_sort_columns() -> TestResult {
r#"[{"version":"four","package":"abc"},{"version":"three","package":"abc"},{"version":"two","package":"Abc"}]"#,
)
}
#[test]
fn raw_string() -> TestResult {
run_test(r#"r#'abcde""fghi"''''jkl'#"#, r#"abcde""fghi"''''jkl"#)?;
run_test(r#"r##'abcde""fghi"''''#jkl'##"#, r#"abcde""fghi"''''#jkl"#)?;
run_test(
r#"r###'abcde""fghi"'''##'#jkl'###"#,
r#"abcde""fghi"'''##'#jkl"#,
)?;
run_test("r#''#", "")?;
run_test(
r#"r#'a string with sharp inside # and ends with #'#"#,
"a string with sharp inside # and ends with #",
)
}
#[test]
fn incomplete_raw_string() -> TestResult {
fail_test("r#abc", "expected '")
}

View file

@ -400,3 +400,18 @@ fn const_glob_type() {
let actual = nu!("const x: glob = 'aa'; $x | describe");
assert_eq!(actual.out, "glob");
}
#[test]
fn const_raw_string() {
let actual = nu!(r#"const x = r#'abcde""fghi"''''jkl'#; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''jkl"#);
let actual = nu!(r#"const x = r##'abcde""fghi"''''#jkl'##; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"''''#jkl"#);
let actual = nu!(r#"const x = r###'abcde""fghi"'''##'#jkl'###; $x"#);
assert_eq!(actual.out, r#"abcde""fghi"'''##'#jkl"#);
let actual = nu!(r#"const x = r#'abc'#; $x"#);
assert_eq!(actual.out, "abc");
}